Compare commits

..

No commits in common. "262604e7614dbb0fbdb374a1b3ea482474f445d0" and "391e859d309d185f62b06234b6e240861a273a0e" have entirely different histories.

3 changed files with 19 additions and 74 deletions

View File

@ -7,7 +7,7 @@
import pickle import pickle
import math import math
from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order, sort_in_ascending_order, map_percentages_to_levels from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order, sort_in_ascending_order
import snowballstemmer import snowballstemmer
@ -94,58 +94,30 @@ def revert_dict(d):
return d2 return d2
def user_difficulty_level(d_user, d, calc_func=0): def user_difficulty_level(d_user, d):
'''
two ways to calculate difficulty_level
set calc_func!=0 to use sqrt, otherwise use weighted average
'''
if calc_func != 0:
# calculation function 1: sqrt
d_user2 = revert_dict(d_user) # key is date, and value is a list of words added in that date
geometric = 0
count = 0
for date in sorted(d_user2.keys(),
reverse=True): # most recently added words are more important while determining user's level
lst = d_user2[date] # a list of words
lst2 = [] # a list of tuples, (word, difficulty level)
for word in lst:
if word in d:
lst2.append((word, d[word]))
lst3 = sort_in_ascending_order(lst2) # easiest tuple first
# print(lst3)
for t in lst3:
word = t[0]
hard = t[1]
# print('WORD %s HARD %4.2f' % (word, hard))
geometric = geometric + math.log(hard)
count += 1
return math.exp(geometric / max(count, 1))
# calculation function 2: weighted average
d_user2 = revert_dict(d_user) # key is date, and value is a list of words added in that date d_user2 = revert_dict(d_user) # key is date, and value is a list of words added in that date
count = {} # number of all kinds of words count = 0
percentages = {} # percentages of all kinds of difficulties geometric = 1
total = 0 # total words for date in sorted(d_user2.keys(),
for date in d_user2.keys(): reverse=True): # most recently added words are more important while determining user's level
lst = d_user2[date] # a list of words lst = d_user2[date] # a list of words
lst2 = [] # a list of tuples, (word, difficulty level)
for word in lst: for word in lst:
if word in d: if word in d:
if d[word] not in count: lst2.append((word, d[word]))
count[d[word]] = 0
count[d[word]] += 1
total += 1
if total == 0: lst3 = sort_in_ascending_order(lst2) # easiest tuple first
return 1 # print(lst3)
for k in count.keys(): for t in lst3:
percentages[k] = count[k] / total word = t[0]
weight = map_percentages_to_levels(percentages) hard = t[1]
sum = 0 # print('WORD %s HARD %4.2f' % (word, hard))
for k in weight.keys(): geometric = geometric * (hard)
sum += weight[k] * k count += 1
return sum if count >= 10:
return geometric ** (1 / count)
return geometric ** (1 / max(count, 1))
def text_difficulty_level(s, d): def text_difficulty_level(s, d):

View File

@ -73,7 +73,6 @@
<button type="button" class="btn-close" data-bs-dismiss="alert" aria-label="Close"></button> <button type="button" class="btn-close" data-bs-dismiss="alert" aria-label="Close"></button>
</div> </div>
{% endfor %} {% endfor %}
<div class="pagination"> <div class="pagination">
<button class="arrow" id="load_pre_article" onclick="load_pre_article();Reader.stopRead()" title="Previous Article"> <button class="arrow" id="load_pre_article" onclick="load_pre_article();Reader.stopRead()" title="Previous Article">
<i class="fas fa-chevron-left"></i> 上一篇 <i class="fas fa-chevron-left"></i> 上一篇

View File

@ -10,32 +10,6 @@ import operator
import os, sys # 引入模块sys因为我要用里面的sys.argv列表中的信息来读取命令行参数。 import os, sys # 引入模块sys因为我要用里面的sys.argv列表中的信息来读取命令行参数。
import pickle_idea import pickle_idea
def map_percentages_to_levels(percentages):
'''
功能按照加权平均难度给生词本计算难度分计算权重的规则是(10 - 该词汇难度) * 该难度词汇占总词汇的比例再进行归一化处理
输入难度占比字典键代表难度3~8值代表每种难度的单词的占比
输出权重字典键代表难度3~8值代表每种难度的单词的权重
'''
# 已排序的键
sorted_keys = sorted(percentages.keys())
# 计算权重和权重总和
sum = 0 # 总和
levels_proportions = {}
for k in sorted_keys:
levels_proportions[k] = 10 - k
for k in sorted_keys:
levels_proportions[k] *= percentages[k]
sum += levels_proportions[k]
# 归一化权重到权重总和为1
for k in sorted_keys:
levels_proportions[k] /= sum
return levels_proportions
def freq(fruit): def freq(fruit):
''' '''
功能 把字符串转成列表 目的是得到每个单词的频率 功能 把字符串转成列表 目的是得到每个单词的频率