forked from mrlan/EnglishPal
				
			Merge pull request 'BUG543-JiWenkai' (#153) from BUG543-JiWenkai into Alpha-snapshot20240618
Reviewed-on: mrlan/EnglishPal#153Bug579-LuKangyang
						commit
						262604e761
					
				|  | @ -7,7 +7,7 @@ | ||||||
| 
 | 
 | ||||||
| import pickle | import pickle | ||||||
| import math | import math | ||||||
| from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order, sort_in_ascending_order | from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order, sort_in_ascending_order, map_percentages_to_levels | ||||||
| import snowballstemmer | import snowballstemmer | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | @ -94,30 +94,58 @@ def revert_dict(d): | ||||||
|     return d2 |     return d2 | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def user_difficulty_level(d_user, d): | def user_difficulty_level(d_user, d, calc_func=0): | ||||||
|  |     ''' | ||||||
|  |     two ways to calculate difficulty_level | ||||||
|  |     set calc_func!=0 to use sqrt, otherwise use weighted average | ||||||
|  |     ''' | ||||||
|  |     if calc_func != 0: | ||||||
|  |         #  calculation function 1: sqrt | ||||||
|  |         d_user2 = revert_dict(d_user)  # key is date, and value is a list of words added in that date | ||||||
|  |         geometric = 0 | ||||||
|  |         count = 0 | ||||||
|  |         for date in sorted(d_user2.keys(), | ||||||
|  |                            reverse=True):  # most recently added words are more important while determining user's level | ||||||
|  |             lst = d_user2[date]  # a list of words | ||||||
|  |             lst2 = []  # a list of tuples, (word, difficulty level) | ||||||
|  |             for word in lst: | ||||||
|  |                 if word in d: | ||||||
|  |                     lst2.append((word, d[word])) | ||||||
|  | 
 | ||||||
|  |             lst3 = sort_in_ascending_order(lst2)  # easiest tuple first | ||||||
|  |             # print(lst3) | ||||||
|  |             for t in lst3: | ||||||
|  |                 word = t[0] | ||||||
|  |                 hard = t[1] | ||||||
|  |                 # print('WORD %s HARD %4.2f' % (word, hard)) | ||||||
|  |                 geometric = geometric + math.log(hard) | ||||||
|  |                 count += 1 | ||||||
|  |         return math.exp(geometric / max(count, 1)) | ||||||
|  | 
 | ||||||
|  |     #  calculation function 2: weighted average | ||||||
|     d_user2 = revert_dict(d_user)  # key is date, and value is a list of words added in that date |     d_user2 = revert_dict(d_user)  # key is date, and value is a list of words added in that date | ||||||
|     count = 0 |     count = {}  # number of all kinds of words | ||||||
|     geometric = 1 |     percentages = {}  # percentages of all kinds of difficulties | ||||||
|     for date in sorted(d_user2.keys(), |     total = 0  # total words | ||||||
|                        reverse=True):  # most recently added words are more important while determining user's level |     for date in d_user2.keys(): | ||||||
|         lst = d_user2[date]  # a list of words |         lst = d_user2[date]  # a list of words | ||||||
|         lst2 = []  # a list of tuples, (word, difficulty level) |  | ||||||
|         for word in lst: |         for word in lst: | ||||||
|             if word in d: |             if word in d: | ||||||
|                 lst2.append((word, d[word])) |                 if d[word] not in count: | ||||||
|  |                     count[d[word]] = 0 | ||||||
|  |                 count[d[word]] += 1 | ||||||
|  |                 total += 1 | ||||||
| 
 | 
 | ||||||
|         lst3 = sort_in_ascending_order(lst2)  # easiest tuple first |     if total == 0: | ||||||
|         # print(lst3) |         return 1 | ||||||
|         for t in lst3: |     for k in count.keys(): | ||||||
|             word = t[0] |         percentages[k] = count[k] / total | ||||||
|             hard = t[1] |     weight = map_percentages_to_levels(percentages) | ||||||
|             # print('WORD %s HARD %4.2f' % (word, hard)) |     sum = 0 | ||||||
|             geometric = geometric * (hard) |     for k in weight.keys(): | ||||||
|             count += 1 |         sum += weight[k] * k | ||||||
|             if count >= 10: |     return sum | ||||||
|                 return geometric ** (1 / count) |  | ||||||
| 
 | 
 | ||||||
|     return geometric ** (1 / max(count, 1)) |  | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def text_difficulty_level(s, d): | def text_difficulty_level(s, d): | ||||||
|  |  | ||||||
|  | @ -73,6 +73,7 @@ | ||||||
| 	<button type="button" class="btn-close" data-bs-dismiss="alert" aria-label="Close"></button> | 	<button type="button" class="btn-close" data-bs-dismiss="alert" aria-label="Close"></button> | ||||||
|     </div> |     </div> | ||||||
|     {% endfor %} |     {% endfor %} | ||||||
|  | 
 | ||||||
|         <div class="pagination"> |         <div class="pagination"> | ||||||
|           <button class="arrow" id="load_pre_article" onclick="load_pre_article();Reader.stopRead()" title="Previous Article"> |           <button class="arrow" id="load_pre_article" onclick="load_pre_article();Reader.stopRead()" title="Previous Article"> | ||||||
|             <i class="fas fa-chevron-left"></i> 上一篇 |             <i class="fas fa-chevron-left"></i> 上一篇 | ||||||
|  |  | ||||||
|  | @ -10,6 +10,32 @@ import operator | ||||||
| import os, sys # 引入模块sys,因为我要用里面的sys.argv列表中的信息来读取命令行参数。 | import os, sys # 引入模块sys,因为我要用里面的sys.argv列表中的信息来读取命令行参数。 | ||||||
| import pickle_idea | import pickle_idea | ||||||
| 
 | 
 | ||||||
|  | 
 | ||||||
|  | def map_percentages_to_levels(percentages): | ||||||
|  |     ''' | ||||||
|  |     功能:按照加权平均难度,给生词本计算难度分,计算权重的规则是(10 - 该词汇难度) * 该难度词汇占总词汇的比例,再进行归一化处理 | ||||||
|  |     输入:难度占比字典,键代表难度3~8,值代表每种难度的单词的占比 | ||||||
|  |     输出:权重字典,键代表难度3~8,值代表每种难度的单词的权重 | ||||||
|  |     ''' | ||||||
|  |     # 已排序的键 | ||||||
|  |     sorted_keys = sorted(percentages.keys()) | ||||||
|  | 
 | ||||||
|  |     # 计算权重和权重总和 | ||||||
|  |     sum = 0  # 总和 | ||||||
|  |     levels_proportions = {} | ||||||
|  |     for k in sorted_keys: | ||||||
|  |         levels_proportions[k] = 10 - k | ||||||
|  |     for k in sorted_keys: | ||||||
|  |         levels_proportions[k] *= percentages[k] | ||||||
|  |         sum += levels_proportions[k] | ||||||
|  | 
 | ||||||
|  |     # 归一化权重到权重总和为1 | ||||||
|  |     for k in sorted_keys: | ||||||
|  |         levels_proportions[k] /= sum | ||||||
|  | 
 | ||||||
|  |     return levels_proportions | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| def freq(fruit): | def freq(fruit): | ||||||
|     ''' |     ''' | ||||||
|     功能: 把字符串转成列表。 目的是得到每个单词的频率。 |     功能: 把字符串转成列表。 目的是得到每个单词的频率。 | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue