summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHui Lan <lanhui@zjnu.edu.cn>2019-11-02 13:21:51 +0800
committerHui Lan <lanhui@zjnu.edu.cn>2019-11-02 13:21:51 +0800
commit08037e7d5f9be86c6e209a3e5e25c7429a707e3c (patch)
tree2dee4453c441c5b386778a7991ddcd9fd867edb6
parent7a2d14901d26efdaacec21c8d3b8028c0e4b8b6f (diff)
app/difficulty.py: adjust difficulty level to reflect test level
If a word appears in CET4 test, it has a difficulty level of 4. If a word appears in CET6 test, it has a difficulty level of 6. If a word does not appear in the above two tests, then its difficulty level is determined by its frequency. If this word is not in the frequency table, then its level is 1. If this word is in the frequency tables, then its level is determined as follows. log2( ((F+1) / (f+1)) ), where F is the largest frequency, and f is the word's frequency. If f=F, then log2(1) = 0, i.e., its difficulty level is 0. If f<F, then log2(1) is a positive value.
-rw-r--r--app/difficulty.py12
1 files changed, 6 insertions, 6 deletions
diff --git a/app/difficulty.py b/app/difficulty.py
index 1c8a08f..dbf6cf0 100644
--- a/app/difficulty.py
+++ b/app/difficulty.py
@@ -19,15 +19,15 @@ def load_record(pickle_fname):
def difficulty_level_from_frequency(word, d):
- level = 0
+ level = 1
if not word in d:
return level
if 'what' in d:
ratio = (d['what']+1)/(d[word]+1) # what is a frequent word
- level = math.log( max(ratio, 1), 10)
+ level = math.log( max(ratio, 1), 2)
- level = min(level, 4)
+ level = min(level, 8)
return level
@@ -40,11 +40,11 @@ def get_difficulty_level(d1, d2):
for k in L3:
if k in d2:
if 'CET4' in d2[k]:
- d[k] = 1 # CET4 word has level 1
+ d[k] = 4 # CET4 word has level 4
elif 'CET6' in d2[k]:
- d[k] = 2
+ d[k] = 6
elif 'BBC' in d2[k]:
- d[k] = 4
+ d[k] = 8
if k in d1: # BBC could contain easy words that are not in CET4 or CET6. So 4 is not reasonable. Recompute difficulty level.
d[k] = min(difficulty_level_from_frequency(k, d1), d[k])
elif k in d1: