Compare commits
	
		
			3 Commits 
		
	
	
	| Author | SHA1 | Date | 
|---|---|---|
| 
							
							
								
									
								
								 | 
						4da4ec415f | |
| 
							
							
								
									
								
								 | 
						f40a968a17 | |
| 
							
							
								
									
								
								 | 
						59a1fe607a | 
| 
						 | 
					@ -7,6 +7,9 @@
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import pickle
 | 
					import pickle
 | 
				
			||||||
import math
 | 
					import math
 | 
				
			||||||
 | 
					from nltk import word_tokenize,pos_tag
 | 
				
			||||||
 | 
					from nltk.corpus import wordnet
 | 
				
			||||||
 | 
					from nltk.stem import WordNetLemmatizer
 | 
				
			||||||
from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order, sort_in_ascending_order
 | 
					from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order, sort_in_ascending_order
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -74,6 +77,34 @@ def revert_dict(d):
 | 
				
			||||||
                d2[date].append(k)
 | 
					                d2[date].append(k)
 | 
				
			||||||
    return d2
 | 
					    return d2
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_wordnet_pos(tag):
 | 
				
			||||||
 | 
					    if tag.startswith('J'):
 | 
				
			||||||
 | 
					        return wordnet.ADJ
 | 
				
			||||||
 | 
					    elif tag.startswith('V'):
 | 
				
			||||||
 | 
					        return wordnet.VERB
 | 
				
			||||||
 | 
					    elif tag.startswith('N'):
 | 
				
			||||||
 | 
					       return wordnet.NOUN
 | 
				
			||||||
 | 
					    elif tag.startswith('R'):
 | 
				
			||||||
 | 
					        return wordnet.ADV
 | 
				
			||||||
 | 
					    else:
 | 
				
			||||||
 | 
					        return None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def combine_words_through_grammar(lst,d): #通过语法合并同一单词的不同形式 
 | 
				
			||||||
 | 
					    lst1=lst
 | 
				
			||||||
 | 
					    tagged_sent = pos_tag(lst)     # 获取单词词性
 | 
				
			||||||
 | 
					    print(tagged_sent)
 | 
				
			||||||
 | 
					    wnl = WordNetLemmatizer()
 | 
				
			||||||
 | 
					    lemmas_sent = []
 | 
				
			||||||
 | 
					    for tag in tagged_sent:
 | 
				
			||||||
 | 
					        wordnet_pos = get_wordnet_pos(tag[1]) or wordnet.NOUN
 | 
				
			||||||
 | 
					        lemmas_sent.append(wnl.lemmatize(tag[0], pos=wordnet_pos)) # 词形还原
 | 
				
			||||||
 | 
					    for index,change_word in enumerate(lemmas_sent):
 | 
				
			||||||
 | 
					        for word2 in d:
 | 
				
			||||||
 | 
					            if change_word==word2:
 | 
				
			||||||
 | 
					                lst1[index]=change_word
 | 
				
			||||||
 | 
					    return lst1  
 | 
				
			||||||
 | 
					   
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def user_difficulty_level(d_user, d):
 | 
					def user_difficulty_level(d_user, d):
 | 
				
			||||||
    d_user2 = revert_dict(d_user) # key is date, and value is a list of words added in that date
 | 
					    d_user2 = revert_dict(d_user) # key is date, and value is a list of words added in that date
 | 
				
			||||||
| 
						 | 
					@ -81,6 +112,7 @@ def user_difficulty_level(d_user, d):
 | 
				
			||||||
    geometric = 1
 | 
					    geometric = 1
 | 
				
			||||||
    for date in sorted(d_user2.keys(), reverse=True): # most recently added words are more important while determining user's level
 | 
					    for date in sorted(d_user2.keys(), reverse=True): # most recently added words are more important while determining user's level
 | 
				
			||||||
        lst = d_user2[date] # a list of words
 | 
					        lst = d_user2[date] # a list of words
 | 
				
			||||||
 | 
					        lst=combine_words_through_grammar(lst,d) #合并单词的不同形式
 | 
				
			||||||
        lst2 = [] # a list of tuples, (word, difficulty level)
 | 
					        lst2 = [] # a list of tuples, (word, difficulty level)
 | 
				
			||||||
        for  word in lst:
 | 
					        for  word in lst:
 | 
				
			||||||
            if word in d:
 | 
					            if word in d:
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue