forked from mrlan/EnglishPal
Compare commits
No commits in common. "BaoYuelin" and "master" have entirely different histories.
|
@ -7,9 +7,6 @@
|
||||||
|
|
||||||
import pickle
|
import pickle
|
||||||
import math
|
import math
|
||||||
from nltk import word_tokenize,pos_tag
|
|
||||||
from nltk.corpus import wordnet
|
|
||||||
from nltk.stem import WordNetLemmatizer
|
|
||||||
from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order, sort_in_ascending_order
|
from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order, sort_in_ascending_order
|
||||||
|
|
||||||
|
|
||||||
|
@ -77,34 +74,6 @@ def revert_dict(d):
|
||||||
d2[date].append(k)
|
d2[date].append(k)
|
||||||
return d2
|
return d2
|
||||||
|
|
||||||
def get_wordnet_pos(tag):
|
|
||||||
if tag.startswith('J'):
|
|
||||||
return wordnet.ADJ
|
|
||||||
elif tag.startswith('V'):
|
|
||||||
return wordnet.VERB
|
|
||||||
elif tag.startswith('N'):
|
|
||||||
return wordnet.NOUN
|
|
||||||
elif tag.startswith('R'):
|
|
||||||
return wordnet.ADV
|
|
||||||
else:
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def combine_words_through_grammar(lst,d): #通过语法合并同一单词的不同形式
|
|
||||||
lst1=lst
|
|
||||||
tagged_sent = pos_tag(lst) # 获取单词词性
|
|
||||||
print(tagged_sent)
|
|
||||||
wnl = WordNetLemmatizer()
|
|
||||||
lemmas_sent = []
|
|
||||||
for tag in tagged_sent:
|
|
||||||
wordnet_pos = get_wordnet_pos(tag[1]) or wordnet.NOUN
|
|
||||||
lemmas_sent.append(wnl.lemmatize(tag[0], pos=wordnet_pos)) # 词形还原
|
|
||||||
for index,change_word in enumerate(lemmas_sent):
|
|
||||||
for word2 in d:
|
|
||||||
if change_word==word2:
|
|
||||||
lst1[index]=change_word
|
|
||||||
return lst1
|
|
||||||
|
|
||||||
|
|
||||||
def user_difficulty_level(d_user, d):
|
def user_difficulty_level(d_user, d):
|
||||||
d_user2 = revert_dict(d_user) # key is date, and value is a list of words added in that date
|
d_user2 = revert_dict(d_user) # key is date, and value is a list of words added in that date
|
||||||
|
@ -112,7 +81,6 @@ def user_difficulty_level(d_user, d):
|
||||||
geometric = 1
|
geometric = 1
|
||||||
for date in sorted(d_user2.keys(), reverse=True): # most recently added words are more important while determining user's level
|
for date in sorted(d_user2.keys(), reverse=True): # most recently added words are more important while determining user's level
|
||||||
lst = d_user2[date] # a list of words
|
lst = d_user2[date] # a list of words
|
||||||
lst=combine_words_through_grammar(lst,d) #合并单词的不同形式
|
|
||||||
lst2 = [] # a list of tuples, (word, difficulty level)
|
lst2 = [] # a list of tuples, (word, difficulty level)
|
||||||
for word in lst:
|
for word in lst:
|
||||||
if word in d:
|
if word in d:
|
||||||
|
|
Loading…
Reference in New Issue