Merge pull request 'BUG543-JiWenkai' (#153) from BUG543-JiWenkai into Alpha-snapshot20240618

Reviewed-on: #153
2024-08-28 07:50:49 +08:00 · 2024-08-28 07:50:49 +08:00 · 262604e761
parent 391e859d30 61a0b39507
commit 262604e761
3 changed files with 74 additions and 19 deletions
--- a/app/difficulty.py
+++ b/app/difficulty.py
@ -7,7 +7,7 @@

 import pickle
 import math
-from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order, sort_in_ascending_order
+from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order, sort_in_ascending_order, map_percentages_to_levels
 import snowballstemmer


@ -94,10 +94,16 @@ def revert_dict(d):
    return d2


-def user_difficulty_level(d_user, d):
+def user_difficulty_level(d_user, d, calc_func=0):
+    '''
+    two ways to calculate difficulty_level
+    set calc_func!=0 to use sqrt, otherwise use weighted average
+    '''
+    if calc_func != 0:
+        #  calculation function 1: sqrt
        d_user2 = revert_dict(d_user)  # key is date, and value is a list of words added in that date
+        geometric = 0
        count = 0
-    geometric = 1
        for date in sorted(d_user2.keys(),
                           reverse=True):  # most recently added words are more important while determining user's level
            lst = d_user2[date]  # a list of words
@ -112,12 +118,34 @@ def user_difficulty_level(d_user, d):
                word = t[0]
                hard = t[1]
                # print('WORD %s HARD %4.2f' % (word, hard))
-            geometric = geometric * (hard)
+                geometric = geometric + math.log(hard)
                count += 1
-            if count >= 10:
-                return geometric ** (1 / count)
+        return math.exp(geometric / max(count, 1))
+
+    #  calculation function 2: weighted average
+    d_user2 = revert_dict(d_user)  # key is date, and value is a list of words added in that date
+    count = {}  # number of all kinds of words
+    percentages = {}  # percentages of all kinds of difficulties
+    total = 0  # total words
+    for date in d_user2.keys():
+        lst = d_user2[date]  # a list of words
+        for word in lst:
+            if word in d:
+                if d[word] not in count:
+                    count[d[word]] = 0
+                count[d[word]] += 1
+                total += 1
+
+    if total == 0:
+        return 1
+    for k in count.keys():
+        percentages[k] = count[k] / total
+    weight = map_percentages_to_levels(percentages)
+    sum = 0
+    for k in weight.keys():
+        sum += weight[k] * k
+    return sum

-    return geometric ** (1 / max(count, 1))


 def text_difficulty_level(s, d):
--- a/app/templates/userpage_get.html
+++ b/app/templates/userpage_get.html
@ -73,6 +73,7 @@
 	<button type="button" class="btn-close" data-bs-dismiss="alert" aria-label="Close"></button>
    </div>
    {% endfor %}
+
        <div class="pagination">
          <button class="arrow" id="load_pre_article" onclick="load_pre_article();Reader.stopRead()" title="Previous Article">
            <i class="fas fa-chevron-left"></i> 上一篇
--- a/app/wordfreqCMD.py
+++ b/app/wordfreqCMD.py
@ -10,6 +10,32 @@ import operator
 import os, sys # 引入模块sys，因为我要用里面的sys.argv列表中的信息来读取命令行参数。
 import pickle_idea

+
+def map_percentages_to_levels(percentages):
+    '''
+    功能：按照加权平均难度，给生词本计算难度分，计算权重的规则是(10 - 该词汇难度) * 该难度词汇占总词汇的比例，再进行归一化处理
+    输入：难度占比字典，键代表难度3~8，值代表每种难度的单词的占比
+    输出：权重字典，键代表难度3~8，值代表每种难度的单词的权重
+    '''
+    # 已排序的键
+    sorted_keys = sorted(percentages.keys())
+
+    # 计算权重和权重总和
+    sum = 0  # 总和
+    levels_proportions = {}
+    for k in sorted_keys:
+        levels_proportions[k] = 10 - k
+    for k in sorted_keys:
+        levels_proportions[k] *= percentages[k]
+        sum += levels_proportions[k]
+
+    # 归一化权重到权重总和为1
+    for k in sorted_keys:
+        levels_proportions[k] /= sum
+
+    return levels_proportions
+
+
 def freq(fruit):
    '''
    功能： 把字符串转成列表。 目的是得到每个单词的频率。