forked from mrlan/EnglishPal
				
			Compare commits
	
		
			6 Commits 
		
	
	
		
			Bug585-zha
			...
			Bug476-Zha
		
	
	| Author | SHA1 | Date | 
|---|---|---|
|  | 828f64770e | |
|  | 16ab48d162 | |
|  | d6adc70d4f | |
|  | 5e66a44d5e | |
|  | bd997d1c66 | |
|  | f553c37af2 | 
|  | @ -7,6 +7,7 @@ | ||||||
| 
 | 
 | ||||||
| import pickle | import pickle | ||||||
| import math | import math | ||||||
|  | from nltk.stem import WordNetLemmatizer # using WordNetLemmatizer for better performance | ||||||
| from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order, sort_in_ascending_order | from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order, sort_in_ascending_order | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | @ -75,12 +76,21 @@ def revert_dict(d): | ||||||
|     return d2 |     return d2 | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | def stem_words(list_of_words):  # It reduces words to the root word (eg. ate, eaten -> eat; leaves, leaf -> leaf) | ||||||
|  |     wnl = WordNetLemmatizer() | ||||||
|  |     lst1 = [wnl.lemmatize(w) for w in list_of_words] | ||||||
|  |     return [wnl.lemmatize(w, pos='v') for w in lst1]  # stem by verb: 'v' represents verb | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| def user_difficulty_level(d_user, d): | def user_difficulty_level(d_user, d): | ||||||
|     d_user2 = revert_dict(d_user) # key is date, and value is a list of words added in that date |     d_user2 = revert_dict(d_user) # key is date, and value is a list of words added in that date | ||||||
|     count = 0 |     count = 0 | ||||||
|     geometric = 1 |     geometric = 1 | ||||||
|     for date in sorted(d_user2.keys(), reverse=True): # most recently added words are more important while determining user's level |     for date in sorted(d_user2.keys(), reverse=True): # most recently added words are more important while determining user's level | ||||||
|         lst = d_user2[date] # a list of words |         lst = d_user2[date] # a list of words | ||||||
|  |         #print(lst) | ||||||
|  |         lst = stem_words(lst) # this call returns a list of words reduced to root word | ||||||
|  |         #print(lst) | ||||||
|         lst2 = [] # a list of tuples, (word, difficulty level) |         lst2 = [] # a list of tuples, (word, difficulty level) | ||||||
|         for  word in lst: |         for  word in lst: | ||||||
|             if word in d: |             if word in d: | ||||||
|  | @ -91,7 +101,7 @@ def user_difficulty_level(d_user, d): | ||||||
|         for t in lst3: |         for t in lst3: | ||||||
|             word = t[0] |             word = t[0] | ||||||
|             hard = t[1] |             hard = t[1] | ||||||
|             #print('WORD %s HARD %4.2f' % (word, hard)) |             print('WORD %s HARD %4.2f' % (word, hard)) | ||||||
|             geometric = geometric * (hard) |             geometric = geometric * (hard) | ||||||
|             count += 1 |             count += 1 | ||||||
|             if count >= 10: |             if count >= 10: | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue