forked from mrlan/EnglishPal
				
			combine_words_through_grammar replaced with stem_words function using nltk stemmer
							parent
							
								
									d6adc70d4f
								
							
						
					
					
						commit
						16ab48d162
					
				|  | @ -7,6 +7,7 @@ | ||||||
| 
 | 
 | ||||||
| import pickle | import pickle | ||||||
| import math | import math | ||||||
|  | from nltk.stem import WordNetLemmatizer | ||||||
| from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order, sort_in_ascending_order | from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order, sort_in_ascending_order | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | @ -75,27 +76,11 @@ def revert_dict(d): | ||||||
|     return d2 |     return d2 | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def combine_words_through_grammar(lst,d): #通过语法合并同一单词的不同形式 | def stem_words(list_of_words):  # It reduces words to the root word (eg. ate, eaten -> eat; leaves, leaf -> leaf) | ||||||
|     lst1=lst |     wnl = WordNetLemmatizer() | ||||||
|     for index,word in enumerate(lst): |     lst1 = [wnl.lemmatize(w) for w in list_of_words] | ||||||
|         change_word='' |     return [wnl.lemmatize(w, pos='v') for w in lst1]  # stem by verb: 'v' represents verb | ||||||
|         if word.endswith('ies'):      #语法条件匹配 | 
 | ||||||
|             change_word=word[:-3]+'y' |  | ||||||
|         elif word.endswith('es'): |  | ||||||
|             change_word=word[:-2] |  | ||||||
|         elif word.endswith('s'): |  | ||||||
|             change_word=word[:-1] |  | ||||||
|         elif word.endswith('ed'): |  | ||||||
|             change_word=word[:-2] |  | ||||||
|         elif word.endswith('en'): |  | ||||||
|             change_word=word[:-2] + 'an' |  | ||||||
|         else: |  | ||||||
|             pass |  | ||||||
|         for word2 in d: |  | ||||||
|             if change_word==word2: |  | ||||||
|                 lst1[index]=change_word |  | ||||||
|                 break |  | ||||||
|     return lst1    |  | ||||||
| 
 | 
 | ||||||
| def user_difficulty_level(d_user, d): | def user_difficulty_level(d_user, d): | ||||||
|     d_user2 = revert_dict(d_user) # key is date, and value is a list of words added in that date |     d_user2 = revert_dict(d_user) # key is date, and value is a list of words added in that date | ||||||
|  | @ -104,7 +89,7 @@ def user_difficulty_level(d_user, d): | ||||||
|     for date in sorted(d_user2.keys(), reverse=True): # most recently added words are more important while determining user's level |     for date in sorted(d_user2.keys(), reverse=True): # most recently added words are more important while determining user's level | ||||||
|         lst = d_user2[date] # a list of words |         lst = d_user2[date] # a list of words | ||||||
|         #print(lst) |         #print(lst) | ||||||
|         lst=combine_words_through_grammar(lst,d) #合并单词的不同形式 |         lst = stem_words(lst) # this call returns a list of words reduced to root word | ||||||
|         #print(lst) |         #print(lst) | ||||||
|         lst2 = [] # a list of tuples, (word, difficulty level) |         lst2 = [] # a list of tuples, (word, difficulty level) | ||||||
|         for  word in lst: |         for  word in lst: | ||||||
|  | @ -116,7 +101,7 @@ def user_difficulty_level(d_user, d): | ||||||
|         for t in lst3: |         for t in lst3: | ||||||
|             word = t[0] |             word = t[0] | ||||||
|             hard = t[1] |             hard = t[1] | ||||||
|             #print('WORD %s HARD %4.2f' % (word, hard)) |             print('WORD %s HARD %4.2f' % (word, hard)) | ||||||
|             geometric = geometric * (hard) |             geometric = geometric * (hard) | ||||||
|             count += 1 |             count += 1 | ||||||
|             if count >= 10: |             if count >= 10: | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue