forked from mrlan/EnglishPal
				
			combine_words_through_grammar replaced with stem_words function using nltk stemmer
							parent
							
								
									d6adc70d4f
								
							
						
					
					
						commit
						16ab48d162
					
				|  | @ -7,6 +7,7 @@ | |||
| 
 | ||||
| import pickle | ||||
| import math | ||||
| from nltk.stem import WordNetLemmatizer | ||||
| from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order, sort_in_ascending_order | ||||
| 
 | ||||
| 
 | ||||
|  | @ -75,27 +76,11 @@ def revert_dict(d): | |||
|     return d2 | ||||
| 
 | ||||
| 
 | ||||
| def combine_words_through_grammar(lst,d): #通过语法合并同一单词的不同形式 | ||||
|     lst1=lst | ||||
|     for index,word in enumerate(lst): | ||||
|         change_word='' | ||||
|         if word.endswith('ies'):      #语法条件匹配 | ||||
|             change_word=word[:-3]+'y' | ||||
|         elif word.endswith('es'): | ||||
|             change_word=word[:-2] | ||||
|         elif word.endswith('s'): | ||||
|             change_word=word[:-1] | ||||
|         elif word.endswith('ed'): | ||||
|             change_word=word[:-2] | ||||
|         elif word.endswith('en'): | ||||
|             change_word=word[:-2] + 'an' | ||||
|         else: | ||||
|             pass | ||||
|         for word2 in d: | ||||
|             if change_word==word2: | ||||
|                 lst1[index]=change_word | ||||
|                 break | ||||
|     return lst1    | ||||
| def stem_words(list_of_words):  # It reduces words to the root word (eg. ate, eaten -> eat; leaves, leaf -> leaf) | ||||
|     wnl = WordNetLemmatizer() | ||||
|     lst1 = [wnl.lemmatize(w) for w in list_of_words] | ||||
|     return [wnl.lemmatize(w, pos='v') for w in lst1]  # stem by verb: 'v' represents verb | ||||
| 
 | ||||
| 
 | ||||
| def user_difficulty_level(d_user, d): | ||||
|     d_user2 = revert_dict(d_user) # key is date, and value is a list of words added in that date | ||||
|  | @ -104,7 +89,7 @@ def user_difficulty_level(d_user, d): | |||
|     for date in sorted(d_user2.keys(), reverse=True): # most recently added words are more important while determining user's level | ||||
|         lst = d_user2[date] # a list of words | ||||
|         #print(lst) | ||||
|         lst=combine_words_through_grammar(lst,d) #合并单词的不同形式 | ||||
|         lst = stem_words(lst) # this call returns a list of words reduced to root word | ||||
|         #print(lst) | ||||
|         lst2 = [] # a list of tuples, (word, difficulty level) | ||||
|         for  word in lst: | ||||
|  | @ -116,7 +101,7 @@ def user_difficulty_level(d_user, d): | |||
|         for t in lst3: | ||||
|             word = t[0] | ||||
|             hard = t[1] | ||||
|             #print('WORD %s HARD %4.2f' % (word, hard)) | ||||
|             print('WORD %s HARD %4.2f' % (word, hard)) | ||||
|             geometric = geometric * (hard) | ||||
|             count += 1 | ||||
|             if count >= 10: | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue