Compare commits
	
		
			1 Commits 
		
	
	
		
			master
			...
			Bug476-Zha
		
	
	| Author | SHA1 | Date | 
|---|---|---|
|  | e2785c40a6 | 
|  | @ -7,6 +7,9 @@ | |||
| 
 | ||||
| import pickle | ||||
| import math | ||||
| from nltk import word_tokenize,pos_tag | ||||
| from nltk.corpus import wordnet | ||||
| from nltk.stem import WordNetLemmatizer | ||||
| from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order, sort_in_ascending_order | ||||
| 
 | ||||
| 
 | ||||
|  | @ -74,6 +77,33 @@ def revert_dict(d): | |||
|                 d2[date].append(k) | ||||
|     return d2 | ||||
| 
 | ||||
| def get_wordnet_pos(tag): | ||||
|     if tag.startswith('J'): | ||||
|         return wordnet.ADJ | ||||
|     elif tag.startswith('V'): | ||||
|         return wordnet.VERB | ||||
|     elif tag.startswith('N'): | ||||
|        return wordnet.NOUN | ||||
|     elif tag.startswith('R'): | ||||
|         return wordnet.ADV | ||||
|     else: | ||||
|         return None | ||||
| 
 | ||||
| 
 | ||||
| def combine_words_through_grammar(lst,d): #通过语法合并同一单词的不同形式  | ||||
|     lst1=lst | ||||
|     tagged_sent = pos_tag(lst)     # 获取单词词性 | ||||
|     print(tagged_sent) | ||||
|     wnl = WordNetLemmatizer() | ||||
|     lemmas_sent = [] | ||||
|     for tag in tagged_sent: | ||||
|         wordnet_pos = get_wordnet_pos(tag[1]) or wordnet.NOUN | ||||
|         lemmas_sent.append(wnl.lemmatize(tag[0], pos=wordnet_pos)) # 词形还原 | ||||
|     for index,change_word in enumerate(lemmas_sent): | ||||
|         for word2 in d: | ||||
|             if change_word==word2: | ||||
|                 lst1[index]=change_word | ||||
|     return lst1   | ||||
| 
 | ||||
| def user_difficulty_level(d_user, d): | ||||
|     d_user2 = revert_dict(d_user) # key is date, and value is a list of words added in that date | ||||
|  | @ -81,6 +111,7 @@ def user_difficulty_level(d_user, d): | |||
|     geometric = 1 | ||||
|     for date in sorted(d_user2.keys(), reverse=True): # most recently added words are more important while determining user's level | ||||
|         lst = d_user2[date] # a list of words | ||||
|         lst=combine_words_through_grammar(lst,d) #合并单词的不同形式 | ||||
|         lst2 = [] # a list of tuples, (word, difficulty level) | ||||
|         for  word in lst: | ||||
|             if word in d: | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue