forked from mrlan/EnglishPal
Compare commits
6 Commits
Bug546-Lix
...
Bug476-Zha
Author | SHA1 | Date |
---|---|---|
Nze Avomo Zenovio Ndong | 828f64770e | |
Nze Avomo Zenovio Ndong | 16ab48d162 | |
Nze Avomo Zenovio Ndong | d6adc70d4f | |
Nze Avomo Zenovio Ndong | 5e66a44d5e | |
张伟浩 | bd997d1c66 | |
yzj642 | f553c37af2 |
|
@ -7,6 +7,7 @@
|
||||||
|
|
||||||
import pickle
|
import pickle
|
||||||
import math
|
import math
|
||||||
|
from nltk.stem import WordNetLemmatizer # using WordNetLemmatizer for better performance
|
||||||
from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order, sort_in_ascending_order
|
from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order, sort_in_ascending_order
|
||||||
|
|
||||||
|
|
||||||
|
@ -75,12 +76,21 @@ def revert_dict(d):
|
||||||
return d2
|
return d2
|
||||||
|
|
||||||
|
|
||||||
|
def stem_words(list_of_words): # It reduces words to the root word (eg. ate, eaten -> eat; leaves, leaf -> leaf)
|
||||||
|
wnl = WordNetLemmatizer()
|
||||||
|
lst1 = [wnl.lemmatize(w) for w in list_of_words]
|
||||||
|
return [wnl.lemmatize(w, pos='v') for w in lst1] # stem by verb: 'v' represents verb
|
||||||
|
|
||||||
|
|
||||||
def user_difficulty_level(d_user, d):
|
def user_difficulty_level(d_user, d):
|
||||||
d_user2 = revert_dict(d_user) # key is date, and value is a list of words added in that date
|
d_user2 = revert_dict(d_user) # key is date, and value is a list of words added in that date
|
||||||
count = 0
|
count = 0
|
||||||
geometric = 1
|
geometric = 1
|
||||||
for date in sorted(d_user2.keys(), reverse=True): # most recently added words are more important while determining user's level
|
for date in sorted(d_user2.keys(), reverse=True): # most recently added words are more important while determining user's level
|
||||||
lst = d_user2[date] # a list of words
|
lst = d_user2[date] # a list of words
|
||||||
|
#print(lst)
|
||||||
|
lst = stem_words(lst) # this call returns a list of words reduced to root word
|
||||||
|
#print(lst)
|
||||||
lst2 = [] # a list of tuples, (word, difficulty level)
|
lst2 = [] # a list of tuples, (word, difficulty level)
|
||||||
for word in lst:
|
for word in lst:
|
||||||
if word in d:
|
if word in d:
|
||||||
|
@ -91,7 +101,7 @@ def user_difficulty_level(d_user, d):
|
||||||
for t in lst3:
|
for t in lst3:
|
||||||
word = t[0]
|
word = t[0]
|
||||||
hard = t[1]
|
hard = t[1]
|
||||||
#print('WORD %s HARD %4.2f' % (word, hard))
|
print('WORD %s HARD %4.2f' % (word, hard))
|
||||||
geometric = geometric * (hard)
|
geometric = geometric * (hard)
|
||||||
count += 1
|
count += 1
|
||||||
if count >= 10:
|
if count >= 10:
|
||||||
|
|
Loading…
Reference in New Issue