Compare commits
	
		
			1 Commits 
		
	
	
		
			master
			...
			Bug476-Zha
		
	
	| Author | SHA1 | Date | 
|---|---|---|
|  | e2785c40a6 | 
|  | @ -7,6 +7,9 @@ | ||||||
| 
 | 
 | ||||||
| import pickle | import pickle | ||||||
| import math | import math | ||||||
|  | from nltk import word_tokenize,pos_tag | ||||||
|  | from nltk.corpus import wordnet | ||||||
|  | from nltk.stem import WordNetLemmatizer | ||||||
| from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order, sort_in_ascending_order | from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order, sort_in_ascending_order | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | @ -74,6 +77,33 @@ def revert_dict(d): | ||||||
|                 d2[date].append(k) |                 d2[date].append(k) | ||||||
|     return d2 |     return d2 | ||||||
| 
 | 
 | ||||||
|  | def get_wordnet_pos(tag): | ||||||
|  |     if tag.startswith('J'): | ||||||
|  |         return wordnet.ADJ | ||||||
|  |     elif tag.startswith('V'): | ||||||
|  |         return wordnet.VERB | ||||||
|  |     elif tag.startswith('N'): | ||||||
|  |        return wordnet.NOUN | ||||||
|  |     elif tag.startswith('R'): | ||||||
|  |         return wordnet.ADV | ||||||
|  |     else: | ||||||
|  |         return None | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def combine_words_through_grammar(lst,d): #通过语法合并同一单词的不同形式  | ||||||
|  |     lst1=lst | ||||||
|  |     tagged_sent = pos_tag(lst)     # 获取单词词性 | ||||||
|  |     print(tagged_sent) | ||||||
|  |     wnl = WordNetLemmatizer() | ||||||
|  |     lemmas_sent = [] | ||||||
|  |     for tag in tagged_sent: | ||||||
|  |         wordnet_pos = get_wordnet_pos(tag[1]) or wordnet.NOUN | ||||||
|  |         lemmas_sent.append(wnl.lemmatize(tag[0], pos=wordnet_pos)) # 词形还原 | ||||||
|  |     for index,change_word in enumerate(lemmas_sent): | ||||||
|  |         for word2 in d: | ||||||
|  |             if change_word==word2: | ||||||
|  |                 lst1[index]=change_word | ||||||
|  |     return lst1   | ||||||
| 
 | 
 | ||||||
| def user_difficulty_level(d_user, d): | def user_difficulty_level(d_user, d): | ||||||
|     d_user2 = revert_dict(d_user) # key is date, and value is a list of words added in that date |     d_user2 = revert_dict(d_user) # key is date, and value is a list of words added in that date | ||||||
|  | @ -81,6 +111,7 @@ def user_difficulty_level(d_user, d): | ||||||
|     geometric = 1 |     geometric = 1 | ||||||
|     for date in sorted(d_user2.keys(), reverse=True): # most recently added words are more important while determining user's level |     for date in sorted(d_user2.keys(), reverse=True): # most recently added words are more important while determining user's level | ||||||
|         lst = d_user2[date] # a list of words |         lst = d_user2[date] # a list of words | ||||||
|  |         lst=combine_words_through_grammar(lst,d) #合并单词的不同形式 | ||||||
|         lst2 = [] # a list of tuples, (word, difficulty level) |         lst2 = [] # a list of tuples, (word, difficulty level) | ||||||
|         for  word in lst: |         for  word in lst: | ||||||
|             if word in d: |             if word in d: | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue