forked from mrlan/EnglishPal
				
			combine_words_through_grammar replaced with stem_words function using nltk stemmer
							parent
							
								
									d6adc70d4f
								
							
						
					
					
						commit
						16ab48d162
					
				| 
						 | 
					@ -7,6 +7,7 @@
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import pickle
 | 
					import pickle
 | 
				
			||||||
import math
 | 
					import math
 | 
				
			||||||
 | 
					from nltk.stem import WordNetLemmatizer
 | 
				
			||||||
from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order, sort_in_ascending_order
 | 
					from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order, sort_in_ascending_order
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -75,27 +76,11 @@ def revert_dict(d):
 | 
				
			||||||
    return d2
 | 
					    return d2
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def combine_words_through_grammar(lst,d): #通过语法合并同一单词的不同形式
 | 
					def stem_words(list_of_words):  # It reduces words to the root word (eg. ate, eaten -> eat; leaves, leaf -> leaf)
 | 
				
			||||||
    lst1=lst
 | 
					    wnl = WordNetLemmatizer()
 | 
				
			||||||
    for index,word in enumerate(lst):
 | 
					    lst1 = [wnl.lemmatize(w) for w in list_of_words]
 | 
				
			||||||
        change_word=''
 | 
					    return [wnl.lemmatize(w, pos='v') for w in lst1]  # stem by verb: 'v' represents verb
 | 
				
			||||||
        if word.endswith('ies'):      #语法条件匹配
 | 
					
 | 
				
			||||||
            change_word=word[:-3]+'y'
 | 
					 | 
				
			||||||
        elif word.endswith('es'):
 | 
					 | 
				
			||||||
            change_word=word[:-2]
 | 
					 | 
				
			||||||
        elif word.endswith('s'):
 | 
					 | 
				
			||||||
            change_word=word[:-1]
 | 
					 | 
				
			||||||
        elif word.endswith('ed'):
 | 
					 | 
				
			||||||
            change_word=word[:-2]
 | 
					 | 
				
			||||||
        elif word.endswith('en'):
 | 
					 | 
				
			||||||
            change_word=word[:-2] + 'an'
 | 
					 | 
				
			||||||
        else:
 | 
					 | 
				
			||||||
            pass
 | 
					 | 
				
			||||||
        for word2 in d:
 | 
					 | 
				
			||||||
            if change_word==word2:
 | 
					 | 
				
			||||||
                lst1[index]=change_word
 | 
					 | 
				
			||||||
                break
 | 
					 | 
				
			||||||
    return lst1   
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
def user_difficulty_level(d_user, d):
 | 
					def user_difficulty_level(d_user, d):
 | 
				
			||||||
    d_user2 = revert_dict(d_user) # key is date, and value is a list of words added in that date
 | 
					    d_user2 = revert_dict(d_user) # key is date, and value is a list of words added in that date
 | 
				
			||||||
| 
						 | 
					@ -104,7 +89,7 @@ def user_difficulty_level(d_user, d):
 | 
				
			||||||
    for date in sorted(d_user2.keys(), reverse=True): # most recently added words are more important while determining user's level
 | 
					    for date in sorted(d_user2.keys(), reverse=True): # most recently added words are more important while determining user's level
 | 
				
			||||||
        lst = d_user2[date] # a list of words
 | 
					        lst = d_user2[date] # a list of words
 | 
				
			||||||
        #print(lst)
 | 
					        #print(lst)
 | 
				
			||||||
        lst=combine_words_through_grammar(lst,d) #合并单词的不同形式
 | 
					        lst = stem_words(lst) # this call returns a list of words reduced to root word
 | 
				
			||||||
        #print(lst)
 | 
					        #print(lst)
 | 
				
			||||||
        lst2 = [] # a list of tuples, (word, difficulty level)
 | 
					        lst2 = [] # a list of tuples, (word, difficulty level)
 | 
				
			||||||
        for  word in lst:
 | 
					        for  word in lst:
 | 
				
			||||||
| 
						 | 
					@ -116,7 +101,7 @@ def user_difficulty_level(d_user, d):
 | 
				
			||||||
        for t in lst3:
 | 
					        for t in lst3:
 | 
				
			||||||
            word = t[0]
 | 
					            word = t[0]
 | 
				
			||||||
            hard = t[1]
 | 
					            hard = t[1]
 | 
				
			||||||
            #print('WORD %s HARD %4.2f' % (word, hard))
 | 
					            print('WORD %s HARD %4.2f' % (word, hard))
 | 
				
			||||||
            geometric = geometric * (hard)
 | 
					            geometric = geometric * (hard)
 | 
				
			||||||
            count += 1
 | 
					            count += 1
 | 
				
			||||||
            if count >= 10:
 | 
					            if count >= 10:
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue