-
Notifications
You must be signed in to change notification settings - Fork 0
/
syn_french.py
57 lines (38 loc) · 1.66 KB
/
syn_french.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
from nltk.tokenize import TreebankWordTokenizer #pip install nltk
from nltk.wsd import lesk
from nltk.corpus import wordnet as wn
#documentaion : https://www.nltk.org/howto/wordnet.html
#explication ::https://www.guru99.com/wordnet-nltk.html
###choisir la langue
# ['als', 'arb', 'cat', 'cmn', 'dan', 'eng', 'eus', 'fas',
# 'fin', 'fra', 'fre', 'glg', 'heb', 'ind', 'ita', 'jpn', 'nno',
# 'nob', 'pol', 'por', 'spa', 'tha', 'zsm']
lang='fra'
sent = TreebankWordTokenizer().tokenize("Je voudrai essayer avec cette phrase. Puis, ajouter une phrase; mais aussi pour m'excamer !!!!")
#> ['Je', 'voudrai', 'essayer', 'avec', 'cette', 'phrase.', 'Puis', ',', 'ajouter', 'une', 'phrase', ';', 'mais', 'aussi', 'pour', "m'excamer", '!', '!', '!', '!']
synsets = [lesk(sent, w, 'n') for w in sent]
print(synsets)
for ws in sent:
for ss in [n for synset in wn.synsets(ws, lang=lang) for n in synset.lemma_names(lang)]:
print((ws, ss), '\n')
while 1:
print('----------------------------------------')
sentence = input()
sent = TreebankWordTokenizer().tokenize(sentence)
synsets = [lesk(sent, w, 'n') for w in sent]
print(synsets)
for ws in sent:
for ss in [n for synset in wn.synsets(ws, lang=lang) for n in synset.lemma_names(lang)]:
print((ws, ss), '\n')
'''
for word in sent:
print('word = ', word)
Liste_synsets = wn.synsets(word, lang=lang)
for ss in set([n for synset in Liste_synsets for n in synset.lemma_names(lang)]):
print(' ' +ss, '\n')
print('\n')
'''
#ce code marche bien. Il trouve les synonymes en francais*
#"methode utilisant la librairie synonymes"
#from synonymes import linternaute
#