forked from mrlan/EnglishPal
				
			更新 'app/difficulty.py'
增加一个功能,使生词表中的单词的变形例如复数被识别为同一个单词,准确评级用户的level(使其在判定单词时不受单词的变形影响而错判为高等级词汇)。 把同一个单词的不同形式看作是同一个单词。 源代码对于单词不同形式改变过于简单,一些复杂形势的过去式变化无法识别出。因此我们引入Python的nltk模块,从而实现对单词的形式变换。Bug476-ZhangWeiHao-BaoYuelin
							parent
							
								
									43c719b6b2
								
							
						
					
					
						commit
						e2785c40a6
					
				|  | @ -7,6 +7,9 @@ | |||
| 
 | ||||
| import pickle | ||||
| import math | ||||
| from nltk import word_tokenize,pos_tag | ||||
| from nltk.corpus import wordnet | ||||
| from nltk.stem import WordNetLemmatizer | ||||
| from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order, sort_in_ascending_order | ||||
| 
 | ||||
| 
 | ||||
|  | @ -74,6 +77,33 @@ def revert_dict(d): | |||
|                 d2[date].append(k) | ||||
|     return d2 | ||||
| 
 | ||||
| def get_wordnet_pos(tag): | ||||
|     if tag.startswith('J'): | ||||
|         return wordnet.ADJ | ||||
|     elif tag.startswith('V'): | ||||
|         return wordnet.VERB | ||||
|     elif tag.startswith('N'): | ||||
|        return wordnet.NOUN | ||||
|     elif tag.startswith('R'): | ||||
|         return wordnet.ADV | ||||
|     else: | ||||
|         return None | ||||
| 
 | ||||
| 
 | ||||
| def combine_words_through_grammar(lst,d): #通过语法合并同一单词的不同形式  | ||||
|     lst1=lst | ||||
|     tagged_sent = pos_tag(lst)     # 获取单词词性 | ||||
|     print(tagged_sent) | ||||
|     wnl = WordNetLemmatizer() | ||||
|     lemmas_sent = [] | ||||
|     for tag in tagged_sent: | ||||
|         wordnet_pos = get_wordnet_pos(tag[1]) or wordnet.NOUN | ||||
|         lemmas_sent.append(wnl.lemmatize(tag[0], pos=wordnet_pos)) # 词形还原 | ||||
|     for index,change_word in enumerate(lemmas_sent): | ||||
|         for word2 in d: | ||||
|             if change_word==word2: | ||||
|                 lst1[index]=change_word | ||||
|     return lst1   | ||||
| 
 | ||||
| def user_difficulty_level(d_user, d): | ||||
|     d_user2 = revert_dict(d_user) # key is date, and value is a list of words added in that date | ||||
|  | @ -81,6 +111,7 @@ def user_difficulty_level(d_user, d): | |||
|     geometric = 1 | ||||
|     for date in sorted(d_user2.keys(), reverse=True): # most recently added words are more important while determining user's level | ||||
|         lst = d_user2[date] # a list of words | ||||
|         lst=combine_words_through_grammar(lst,d) #合并单词的不同形式 | ||||
|         lst2 = [] # a list of tuples, (word, difficulty level) | ||||
|         for  word in lst: | ||||
|             if word in d: | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue