forked from mrlan/EnglishPal
				
			Merge pull request 'BUG543-JiWenkai' (#153) from BUG543-JiWenkai into Alpha-snapshot20240618
Reviewed-on: mrlan/EnglishPal#153Bug579-LuKangyang
						commit
						262604e761
					
				| 
						 | 
					@ -7,7 +7,7 @@
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import pickle
 | 
					import pickle
 | 
				
			||||||
import math
 | 
					import math
 | 
				
			||||||
from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order, sort_in_ascending_order
 | 
					from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order, sort_in_ascending_order, map_percentages_to_levels
 | 
				
			||||||
import snowballstemmer
 | 
					import snowballstemmer
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -94,10 +94,16 @@ def revert_dict(d):
 | 
				
			||||||
    return d2
 | 
					    return d2
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def user_difficulty_level(d_user, d):
 | 
					def user_difficulty_level(d_user, d, calc_func=0):
 | 
				
			||||||
 | 
					    '''
 | 
				
			||||||
 | 
					    two ways to calculate difficulty_level
 | 
				
			||||||
 | 
					    set calc_func!=0 to use sqrt, otherwise use weighted average
 | 
				
			||||||
 | 
					    '''
 | 
				
			||||||
 | 
					    if calc_func != 0:
 | 
				
			||||||
 | 
					        #  calculation function 1: sqrt
 | 
				
			||||||
        d_user2 = revert_dict(d_user)  # key is date, and value is a list of words added in that date
 | 
					        d_user2 = revert_dict(d_user)  # key is date, and value is a list of words added in that date
 | 
				
			||||||
 | 
					        geometric = 0
 | 
				
			||||||
        count = 0
 | 
					        count = 0
 | 
				
			||||||
    geometric = 1
 | 
					 | 
				
			||||||
        for date in sorted(d_user2.keys(),
 | 
					        for date in sorted(d_user2.keys(),
 | 
				
			||||||
                           reverse=True):  # most recently added words are more important while determining user's level
 | 
					                           reverse=True):  # most recently added words are more important while determining user's level
 | 
				
			||||||
            lst = d_user2[date]  # a list of words
 | 
					            lst = d_user2[date]  # a list of words
 | 
				
			||||||
| 
						 | 
					@ -112,12 +118,34 @@ def user_difficulty_level(d_user, d):
 | 
				
			||||||
                word = t[0]
 | 
					                word = t[0]
 | 
				
			||||||
                hard = t[1]
 | 
					                hard = t[1]
 | 
				
			||||||
                # print('WORD %s HARD %4.2f' % (word, hard))
 | 
					                # print('WORD %s HARD %4.2f' % (word, hard))
 | 
				
			||||||
            geometric = geometric * (hard)
 | 
					                geometric = geometric + math.log(hard)
 | 
				
			||||||
                count += 1
 | 
					                count += 1
 | 
				
			||||||
            if count >= 10:
 | 
					        return math.exp(geometric / max(count, 1))
 | 
				
			||||||
                return geometric ** (1 / count)
 | 
					
 | 
				
			||||||
 | 
					    #  calculation function 2: weighted average
 | 
				
			||||||
 | 
					    d_user2 = revert_dict(d_user)  # key is date, and value is a list of words added in that date
 | 
				
			||||||
 | 
					    count = {}  # number of all kinds of words
 | 
				
			||||||
 | 
					    percentages = {}  # percentages of all kinds of difficulties
 | 
				
			||||||
 | 
					    total = 0  # total words
 | 
				
			||||||
 | 
					    for date in d_user2.keys():
 | 
				
			||||||
 | 
					        lst = d_user2[date]  # a list of words
 | 
				
			||||||
 | 
					        for word in lst:
 | 
				
			||||||
 | 
					            if word in d:
 | 
				
			||||||
 | 
					                if d[word] not in count:
 | 
				
			||||||
 | 
					                    count[d[word]] = 0
 | 
				
			||||||
 | 
					                count[d[word]] += 1
 | 
				
			||||||
 | 
					                total += 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if total == 0:
 | 
				
			||||||
 | 
					        return 1
 | 
				
			||||||
 | 
					    for k in count.keys():
 | 
				
			||||||
 | 
					        percentages[k] = count[k] / total
 | 
				
			||||||
 | 
					    weight = map_percentages_to_levels(percentages)
 | 
				
			||||||
 | 
					    sum = 0
 | 
				
			||||||
 | 
					    for k in weight.keys():
 | 
				
			||||||
 | 
					        sum += weight[k] * k
 | 
				
			||||||
 | 
					    return sum
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return geometric ** (1 / max(count, 1))
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def text_difficulty_level(s, d):
 | 
					def text_difficulty_level(s, d):
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -73,6 +73,7 @@
 | 
				
			||||||
	<button type="button" class="btn-close" data-bs-dismiss="alert" aria-label="Close"></button>
 | 
						<button type="button" class="btn-close" data-bs-dismiss="alert" aria-label="Close"></button>
 | 
				
			||||||
    </div>
 | 
					    </div>
 | 
				
			||||||
    {% endfor %}
 | 
					    {% endfor %}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        <div class="pagination">
 | 
					        <div class="pagination">
 | 
				
			||||||
          <button class="arrow" id="load_pre_article" onclick="load_pre_article();Reader.stopRead()" title="Previous Article">
 | 
					          <button class="arrow" id="load_pre_article" onclick="load_pre_article();Reader.stopRead()" title="Previous Article">
 | 
				
			||||||
            <i class="fas fa-chevron-left"></i> 上一篇
 | 
					            <i class="fas fa-chevron-left"></i> 上一篇
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -10,6 +10,32 @@ import operator
 | 
				
			||||||
import os, sys # 引入模块sys,因为我要用里面的sys.argv列表中的信息来读取命令行参数。
 | 
					import os, sys # 引入模块sys,因为我要用里面的sys.argv列表中的信息来读取命令行参数。
 | 
				
			||||||
import pickle_idea
 | 
					import pickle_idea
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def map_percentages_to_levels(percentages):
 | 
				
			||||||
 | 
					    '''
 | 
				
			||||||
 | 
					    功能:按照加权平均难度,给生词本计算难度分,计算权重的规则是(10 - 该词汇难度) * 该难度词汇占总词汇的比例,再进行归一化处理
 | 
				
			||||||
 | 
					    输入:难度占比字典,键代表难度3~8,值代表每种难度的单词的占比
 | 
				
			||||||
 | 
					    输出:权重字典,键代表难度3~8,值代表每种难度的单词的权重
 | 
				
			||||||
 | 
					    '''
 | 
				
			||||||
 | 
					    # 已排序的键
 | 
				
			||||||
 | 
					    sorted_keys = sorted(percentages.keys())
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # 计算权重和权重总和
 | 
				
			||||||
 | 
					    sum = 0  # 总和
 | 
				
			||||||
 | 
					    levels_proportions = {}
 | 
				
			||||||
 | 
					    for k in sorted_keys:
 | 
				
			||||||
 | 
					        levels_proportions[k] = 10 - k
 | 
				
			||||||
 | 
					    for k in sorted_keys:
 | 
				
			||||||
 | 
					        levels_proportions[k] *= percentages[k]
 | 
				
			||||||
 | 
					        sum += levels_proportions[k]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # 归一化权重到权重总和为1
 | 
				
			||||||
 | 
					    for k in sorted_keys:
 | 
				
			||||||
 | 
					        levels_proportions[k] /= sum
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return levels_proportions
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def freq(fruit):
 | 
					def freq(fruit):
 | 
				
			||||||
    '''
 | 
					    '''
 | 
				
			||||||
    功能: 把字符串转成列表。 目的是得到每个单词的频率。
 | 
					    功能: 把字符串转成列表。 目的是得到每个单词的频率。
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue