改进评级用户level #44

Closed
zhangweihao wants to merge 6 commits from Bug476-ZhangWeiHao into master
1 changed files with 11 additions and 1 deletions

View File

@ -7,6 +7,7 @@
import pickle
import math
from nltk.stem import WordNetLemmatizer # using WordNetLemmatizer for better performance
from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order, sort_in_ascending_order
@ -75,12 +76,21 @@ def revert_dict(d):
return d2
def stem_words(list_of_words): # It reduces words to the root word (eg. ate, eaten -> eat; leaves, leaf -> leaf)
wnl = WordNetLemmatizer()
lst1 = [wnl.lemmatize(w) for w in list_of_words]
return [wnl.lemmatize(w, pos='v') for w in lst1] # stem by verb: 'v' represents verb

@zenovio

Thanks. What if a word is not a verb? Does adding the extra Part of Speech option 'v' affect the outcome?

Hui

@zenovio Thanks. What if a word is not a verb? Does adding the extra Part of Speech option 'v' affect the outcome? Hui
def user_difficulty_level(d_user, d):
d_user2 = revert_dict(d_user) # key is date, and value is a list of words added in that date
count = 0
geometric = 1
for date in sorted(d_user2.keys(), reverse=True): # most recently added words are more important while determining user's level
lst = d_user2[date] # a list of words
#print(lst)
lst = stem_words(lst) # this call returns a list of words reduced to root word
#print(lst)
lst2 = [] # a list of tuples, (word, difficulty level)
for word in lst:
if word in d:
@ -91,7 +101,7 @@ def user_difficulty_level(d_user, d):
for t in lst3:
word = t[0]
hard = t[1]
#print('WORD %s HARD %4.2f' % (word, hard))
print('WORD %s HARD %4.2f' % (word, hard))
geometric = geometric * (hard)
count += 1
if count >= 10: