1
0
Fork 0

Compare commits

...

4 Commits

14 changed files with 187 additions and 175 deletions

View File

@ -22,12 +22,12 @@ def total_number_of_essays():
return len(result)
def get_article_title(s):
return s.split('\n')[0]
def get_article_title(article):
return article.split('\n')[0]
def get_article_body(s):
lst = s.split('\n')
def get_article_body(article):
lst = article.split('\n')
lst.pop(0) # remove the first line
return '\n'.join(lst)
@ -111,11 +111,11 @@ def within_range(x, y, r):
return x > y and abs(x - y) <= r
def get_question_part(s):
s = s.strip()
def get_question_part(article):
article = article.strip()
result = []
flag = 0
for line in s.split('\n'):
for line in article.split('\n'):
line = line.strip()
if line == 'QUESTION':
result.append(line)
@ -127,11 +127,11 @@ def get_question_part(s):
return '\n'.join(result)
def get_answer_part(s):
s = s.strip()
def get_answer_part(article):
article = article.strip()
result = []
flag = 0
for line in s.split('\n'):
for line in article.split('\n'):
line = line.strip()
if line == 'ANSWER':
flag = 1

View File

@ -3,23 +3,26 @@ import string
from datetime import datetime, timedelta
from UseSqlite import InsertQuery, RecordQuery
def md5(s):
'''
"""
MD5摘要
:param str: 字符串
:return: 经MD5以后的字符串
'''
"""
h = hashlib.md5(s.encode(encoding='utf-8'))
return h.hexdigest()
# import model.user after the defination of md5(s) to avoid circular import
from model.user import get_user_by_username, insert_user, update_password_by_username
path_prefix = '/var/www/wordfreq/wordfreq/'
path_prefix = './' # comment this line in deployment
def verify_pass(newpass,oldpass):
if(newpass==oldpass):
def verify_pass(new_password, old_password):
if new_password == old_password:
return True
@ -43,17 +46,17 @@ def check_username_availability(username):
def change_password(username, old_password, new_password):
'''
"""
修改密码
:param username: 用户名
:param old_password: 旧的密码
:param new_password: 新密码
:return: 修改成功:True 否则:False
'''
"""
if not verify_user(username, old_password): # 旧密码错误
return False
# 将用户名和密码一起加密,以免暴露不同用户的相同密码
if verify_pass(new_password,old_password): #新旧密码一致
if verify_pass(new_password, old_password): # 新旧密码一致
return False
update_password_by_username(username, new_password)
return True
@ -66,6 +69,7 @@ def get_expiry_date(username):
else:
return user.expiry_date
class UserName:
def __init__(self, username):
self.username = username
@ -73,11 +77,11 @@ class UserName:
def validate(self):
if len(self.username) > 20:
return f'{self.username} is too long. The user name cannot exceed 20 characters.'
if self.username.startswith('.'): # a user name must not start with a dot
if self.username.startswith('.'): # a user name must not start with a dot
return 'Period (.) is not allowed as the first letter in the user name.'
if ' ' in self.username: # a user name must not include a whitespace
if ' ' in self.username: # a user name must not include a whitespace
return 'Whitespace is not allowed in the user name.'
for c in self.username: # a user name must not include special characters, except non-leading periods or underscores
for c in self.username: # a user name must not include special characters, except non-leading periods or underscores
if c in string.punctuation and c != '.' and c != '_':
return f'{c} is not allowed in the user name.'
if self.username in ['signup', 'login', 'logout', 'reset', 'mark', 'back', 'unfamiliar', 'familiar', 'del', 'admin']:

View File

@ -9,6 +9,7 @@
import sqlite3
class Sqlite3Template:
def __init__(self, db_fname):
self.db_fname = db_fname
@ -72,7 +73,6 @@ class RecordQuery(Sqlite3Template):
return result
if __name__ == '__main__':
#iq = InsertQuery('RiskDB.db')

View File

@ -6,6 +6,7 @@
from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order
import string
class WordFreq:
def __init__(self, s):
self.s = remove_punctuation(s)

View File

@ -1,10 +1,10 @@
'''
"""
Yaml.py
配置文件包括:
./static/config.yml
./layout/partial/header.html
./layout/partial/footer.html
'''
"""
import yaml as YAML
import os
@ -15,7 +15,7 @@ ymlPath = path_prefix + 'static/config.yml'
# partial文件夹路径
partialPath = path_prefix + 'layout/partial/'
f = open(ymlPath, 'r', encoding='utf-8') # 以'UTF-8'格式打开YAML文件
f = open(ymlPath, 'r', encoding='utf-8') # 以'UTF-8'格式打开YAML文件
cont = f.read() # 以文本形式读取YAML
yml = YAML.load(cont, Loader=YAML.FullLoader) # 加载YAML

View File

@ -8,10 +8,10 @@ accountService = Blueprint("accountService", __name__)
### Sign-up, login, logout ###
@accountService.route("/signup", methods=['GET', 'POST'])
def signup():
'''
"""
注册
:return: 根据注册是否成功返回不同界面
'''
"""
if request.method == 'GET':
# GET方法直接返回注册页面
return render_template('signup.html')
@ -19,12 +19,12 @@ def signup():
# POST方法需判断是否注册成功再根据结果返回不同的内容
username = escape(request.form['username'])
password = escape(request.form['password'])
#! 添加如下代码为了过滤注册时的非法字符
# ! 添加如下代码为了过滤注册时的非法字符
warn = WarningMessage(username)
if str(warn) != 'OK':
return jsonify({'status': '3', 'warn': str(warn)})
available = check_username_availability(username)
if not available: # 用户名不可用
return jsonify({'status': '0'})
@ -43,13 +43,12 @@ def signup():
return jsonify({'status': '1'})
@accountService.route("/login", methods=['GET', 'POST'])
def login():
'''
"""
登录
:return: 根据登录是否成功返回不同页面
'''
"""
if request.method == 'GET':
# GET请求
return render_template('login.html')
@ -74,10 +73,10 @@ def login():
@accountService.route("/logout", methods=['GET', 'POST'])
def logout():
'''
"""
登出
:return: 重定位到主界面
'''
"""
# 将session标记为登出状态
session['logged_in'] = False
return redirect(url_for('mainpage'))
@ -85,10 +84,10 @@ def logout():
@accountService.route("/reset", methods=['GET', 'POST'])
def reset():
'''
"""
重设密码
:return: 返回适当的页面
'''
"""
# 下列方法用于防止未登录状态下的修改密码
if not session.get('logged_in'):
return render_template('login.html')
@ -102,9 +101,9 @@ def reset():
# POST请求用于提交修改后信息
old_password = escape(request.form['old-password'])
new_password = escape(request.form['new-password'])
flag = change_password(username, old_password, new_password) # flag表示是否修改成功
flag = change_password(username, old_password, new_password) # flag表示是否修改成功
if flag:
session['logged_in'] = False
return jsonify({'status':'1'}) # 修改成功
return jsonify({'status': '1'}) # 修改成功
else:
return jsonify({'status':'2'}) # 修改失败
return jsonify({'status': '2'}) # 修改失败

View File

@ -52,7 +52,7 @@ def article():
max(1, int(request.args.get("page", 1))), _article_number // _page_size + (_article_number % _page_size > 0)
) # 最小的page是1
except ValueError:
return "page parmas must be int!"
return "page params must be int!"
_articles = get_page_articles(_cur_page, _page_size)
for article in _articles: # 获取每篇文章的title

View File

@ -18,25 +18,25 @@ def load_record(pickle_fname):
return d
def convert_test_type_to_difficulty_level(d):
def convert_test_type_to_difficulty_level(words_dict):
"""
对原本的单词库中的单词进行难度评级
:param d: 存储了单词库pickle文件中的单词的字典
:param words_dict: 存储了单词库pickle文件中的单词的字典
:return:
"""
result = {}
L = list(d.keys()) # in d, we have test types (e.g., CET4,CET6,BBC) for each word
words_lst = list(words_dict.keys()) # in words_dict, we have test types (e.g., CET4,CET6,BBC) for each word
for k in L:
if 'CET4' in d[k]:
for k in words_lst:
if 'CET4' in words_dict[k]:
result[k] = 4 # CET4 word has level 4
elif 'OXFORD3000' in d[k]:
elif 'OXFORD3000' in words_dict[k]:
result[k] = 5
elif 'CET6' in d[k] or 'GRADUATE' in d[k]:
elif 'CET6' in words_dict[k] or 'GRADUATE' in words_dict[k]:
result[k] = 6
elif 'OXFORD5000' in d[k] or 'IELTS' in d[k]:
elif 'OXFORD5000' in words_dict[k] or 'IELTS' in words_dict[k]:
result[k] = 7
elif 'BBC' in d[k]:
elif 'BBC' in words_dict[k]:
result[k] = 8
return result # {'apple': 4, ...}
@ -65,10 +65,10 @@ def get_difficulty_level_for_user(d1, d2):
def revert_dict(d):
'''
"""
In d, word is the key, and value is a list of dates.
In d2 (the returned value of this function), time is the key, and the value is a list of words picked at that time.
'''
"""
d2 = {}
for k in d:
if type(d[k]) is list: # d[k] is a list of dates.
@ -80,7 +80,7 @@ def revert_dict(d):
for time_info in lst:
date = time_info[:10] # until hour
if not date in d2:
if date not in d2:
d2[date] = [k]
else:
d2[date].append(k)
@ -105,7 +105,7 @@ def user_difficulty_level(d_user, d):
word = t[0]
hard = t[1]
# print('WORD %s HARD %4.2f' % (word, hard))
geometric = geometric * (hard)
geometric = geometric * hard
count += 1
if count >= 10:
return geometric ** (1 / count)
@ -131,7 +131,7 @@ def text_difficulty_level(s, d):
for t in lst2:
word = t[0]
hard = t[1]
geometric = geometric * (hard)
geometric = geometric * hard
count += 1
if count >= 20: # we look for n most difficult words
return geometric ** (1 / count)

View File

@ -23,33 +23,34 @@ app.register_blueprint(adminService)
path_prefix = '/var/www/wordfreq/wordfreq/'
path_prefix = './' # comment this line in deployment
def get_random_image(path):
'''
"""
返回随机图
:param path: 图片文件(JPEG格式)不包含后缀名
:return:
'''
"""
img_path = random.choice(glob.glob(os.path.join(path, '*.jpg')))
return img_path[img_path.rfind('/static'):]
def get_random_ads():
'''
"""
返回随机广告
:return: 一个广告(包含HTML标签)
'''
"""
return random.choice(['个性化分析精准提升', '你的专有单词本', '智能捕捉阅读弱点,针对性提高你的阅读水平'])
def appears_in_test(word, d):
'''
"""
如果字符串里没有指定的单词则返回逗号加单词
:param word: 指定单词
:param d: 字符串
:return: 逗号加单词
'''
if not word in d:
"""
if word not in d:
return ''
else:
return ','.join(d[word])
@ -57,36 +58,36 @@ def appears_in_test(word, d):
@app.route("/mark", methods=['GET', 'POST'])
def mark_word():
'''
"""
标记单词
:return: 重定位到主界面
'''
"""
if request.method == 'POST':
d = load_freq_history(path_prefix + 'static/frequency/frequency.p')
lst_history = pickle_idea.dict2lst(d)
lst_history = pickle_idea.dict_to_lst(d)
lst = []
for word in request.form.getlist('marked'):
lst.append((word, 1))
d = pickle_idea.merge_frequency(lst, lst_history)
pickle_idea.save_frequency_to_pickle(d, path_prefix + 'static/frequency/frequency.p')
return redirect(url_for('mainpage'))
else: # 不回应GET请求
else: # 不回应GET请求
return 'Under construction'
@app.route("/", methods=['GET', 'POST'])
def mainpage():
'''
"""
根据GET或POST方法来返回不同的主界面
:return: 主界面
'''
"""
if request.method == 'POST': # when we submit a form
content = escape(request.form['content'])
f = WordFreq(content)
lst = f.get_freq()
# save history
d = load_freq_history(path_prefix + 'static/frequency/frequency.p')
lst_history = pickle_idea.dict2lst(d)
lst_history = pickle_idea.dict_to_lst(d)
d = pickle_idea.merge_frequency(lst, lst_history)
pickle_idea.save_frequency_to_pickle(d, path_prefix + 'static/frequency/frequency.p')
return render_template('mainpage_post.html', lst=lst, yml=Yaml.yml)
@ -96,8 +97,8 @@ def mainpage():
number_of_essays = total_number_of_essays()
d = load_freq_history(path_prefix + 'static/frequency/frequency.p')
d_len = len(d)
lst = sort_in_descending_order(pickle_idea.dict2lst(d))
return render_template('mainpage_get.html',
lst = sort_in_descending_order(pickle_idea.dict_to_lst(d))
return render_template('mainpage_get.html',
admin_name=ADMIN_NAME,
random_ads=random_ads,
d_len=d_len,

View File

@ -10,29 +10,29 @@ import pickle
from datetime import datetime
def lst2dict(lst, d):
'''
def lst_to_dict(lst, d):
"""
Store the information in list lst to dictionary d.
Note: nothing is returned.
'''
"""
for x in lst:
word = x[0]
freq = x[1]
if not word in d:
if word not in d:
d[word] = freq
else:
d[word] += freq
def dict2lst(d):
return list(d.items()) # a list of (key, value) pairs
def dict_to_lst(d):
return list(d.items()) # a list of (key, value) pairs
def merge_frequency(lst1, lst2):
def merge_frequency(list1, list2):
d = {}
lst2dict(lst1, d)
lst2dict(lst2, d)
lst_to_dict(list1, d)
lst_to_dict(list2, d)
return d
@ -54,33 +54,35 @@ def save_frequency_to_pickle(d, pickle_fname):
pickle.dump(d2, f)
f.close()
def unfamiliar(path,word):
f = open(path,"rb")
def unfamiliar(path, word):
f = open(path, "rb")
dic = pickle.load(f)
dic[word] += [datetime.now().strftime('%Y%m%d%H%M')]
fp = open(path,"wb")
pickle.dump(dic,fp)
fp = open(path, "wb")
pickle.dump(dic, fp)
def familiar(path,word):
f = open(path,"rb")
def familiar(path, word):
f = open(path, "rb")
dic = pickle.load(f)
if len(dic[word])>1:
if len(dic[word]) > 1:
del dic[word][0]
else:
dic.pop(word)
fp = open(path,"wb")
pickle.dump(dic,fp)
fp = open(path, "wb")
pickle.dump(dic, fp)
if __name__ == '__main__':
lst1 = [('apple',2), ('banana',1)]
lst1 = [('apple', 2), ('banana', 1)]
d = {}
lst2dict(lst1, d) # d will change
save_frequency_to_pickle(d, 'frequency.p') # frequency.p is our database
lst_to_dict(lst1, d) # d will change
save_frequency_to_pickle(d, 'frequency.p') # frequency.p is our database
lst2 = [('banana',2), ('orange', 4)]
lst2 = [('banana', 2), ('orange', 4)]
d = load_record('frequency.p')
lst1 = dict2lst(d)
lst1 = dict_to_lst(d)
d = merge_frequency(lst2, lst1)
print(d)

View File

@ -11,21 +11,23 @@
import pickle
from datetime import datetime
def lst2dict(lst, d):
'''
def lst_to_dict(lst, d):
"""
Store the information in list lst to dictionary d.
Note: nothing is returned.
'''
"""
for x in lst:
word = x[0]
dates = x[1]
if not word in d:
if word not in d:
d[word] = dates
else:
d[word] += dates
def deleteRecord(path,word):
def delete_record(path, word):
with open(path, 'rb') as f:
db = pickle.load(f)
try:
@ -33,9 +35,10 @@ def deleteRecord(path,word):
except KeyError:
print("sorry")
with open(path, 'wb') as ff:
pickle.dump(db, ff)
pickle.dump(db, ff)
def dict2lst(d):
def dict_to_lst(d):
if len(d) > 0:
keys = list(d.keys())
if isinstance(d[keys[0]], int):
@ -44,14 +47,15 @@ def dict2lst(d):
lst.append((k, [datetime.now().strftime('%Y%m%d%H%M')]))
return lst
elif isinstance(d[keys[0]], list):
return list(d.items()) # a list of (key, value) pairs
return list(d.items()) # a list of (key, value) pairs
return []
def merge_frequency(lst1, lst2):
d = {}
lst2dict(lst1, d)
lst2dict(lst2, d)
lst_to_dict(lst1, d)
lst_to_dict(lst2, d)
return d
@ -67,23 +71,22 @@ def save_frequency_to_pickle(d, pickle_fname):
exclusion_lst = ['one', 'no', 'has', 'had', 'do', 'that', 'have', 'by', 'not', 'but', 'we', 'this', 'my', 'him', 'so', 'or', 'as', 'are', 'it', 'from', 'with', 'be', 'can', 'for', 'an', 'if', 'who', 'whom', 'whose', 'which', 'the', 'to', 'a', 'of', 'and', 'you', 'i', 'he', 'she', 'they', 'me', 'was', 'were', 'is', 'in', 'at', 'on', 'their', 'his', 'her', 's', 'said', 'all', 'did', 'been', 'w']
d2 = {}
for k in d:
if not k in exclusion_lst and not k.isnumeric() and not len(k) < 2:
d2[k] = list(sorted(d[k])) # 原先这里是d2[k] = list(sorted(set(d[k])))
if k not in exclusion_lst and not k.isnumeric() and not len(k) < 2:
d2[k] = list(sorted(d[k])) # 原先这里是d2[k] = list(sorted(set(d[k])))
pickle.dump(d2, f)
f.close()
if __name__ == '__main__':
lst1 = [('apple',['201910251437', '201910251438']), ('banana',['201910251439'])]
lst1 = [('apple',['201910251437', '201910251438']), ('banana', ['201910251439'])]
d = {}
lst2dict(lst1, d) # d will change
lst_to_dict(lst1, d) # d will change
save_frequency_to_pickle(d, 'frequency.p') # frequency.p is our database
lst2 = [('banana',['201910251439']), ('orange', ['201910251440', '201910251439'])]
d = load_record('frequency.p')
lst1 = dict2lst(d)
lst1 = dict_to_lst(d)
d = merge_frequency(lst2, lst1)
print(d)

View File

@ -21,6 +21,7 @@ userService = Blueprint("user_bp", __name__)
path_prefix = '/var/www/wordfreq/wordfreq/'
path_prefix = './' # comment this line in deployment
@userService.route("/get_next_article/<username>",methods=['GET','POST'])
def get_next_article(username):
user_freq_record = path_prefix + 'static/frequency/' + 'frequency_%s.pickle' % (username)
@ -42,13 +43,14 @@ def get_next_article(username):
return 'Under construction'
return json.dumps(data)
@userService.route("/get_pre_article/<username>",methods=['GET'])
def get_pre_article(username):
user_freq_record = path_prefix + 'static/frequency/' + 'frequency_%s.pickle' % (username)
if request.method == 'GET':
visited_articles = session.get("visited_articles")
if(visited_articles["index"]==0):
data=''
if visited_articles["index"] == 0:
data = ''
else:
visited_articles["index"] -= 1 # 上一篇index-=1
if visited_articles['article_ids'][-1] == "null": # 如果当前还是“null”则将“null”pop出来
@ -58,19 +60,20 @@ def get_pre_article(username):
data = {
'visited_articles': visited_articles,
'today_article': today_article,
'result_of_generate_article':result_of_generate_article
'result_of_generate_article': result_of_generate_article
}
return json.dumps(data)
@userService.route("/<username>/<word>/unfamiliar", methods=['GET', 'POST'])
def unfamiliar(username, word):
'''
"""
:param username:
:param word:
:return:
'''
user_freq_record = path_prefix + 'static/frequency/' + 'frequency_%s.pickle' % (username)
"""
user_freq_record = path_prefix + 'static/frequency/' + 'frequency_%s.pickle' % username
pickle_idea.unfamiliar(user_freq_record, word)
session['thisWord'] = word # 1. put a word into session
session['time'] = 1
@ -79,13 +82,13 @@ def unfamiliar(username, word):
@userService.route("/<username>/<word>/familiar", methods=['GET', 'POST'])
def familiar(username, word):
'''
"""
:param username:
:param word:
:return:
'''
user_freq_record = path_prefix + 'static/frequency/' + 'frequency_%s.pickle' % (username)
"""
user_freq_record = path_prefix + 'static/frequency/' + 'frequency_%s.pickle' % username
pickle_idea.familiar(user_freq_record, word)
session['thisWord'] = word # 1. put a word into session
session['time'] = 1
@ -93,15 +96,15 @@ def familiar(username, word):
@userService.route("/<username>/<word>/del", methods=['GET', 'POST'])
def deleteword(username, word):
'''
def delete_word(username, word):
"""
删除单词
:param username: 用户名
:param word: 单词
:return: 重定位到用户界面
'''
"""
user_freq_record = path_prefix + 'static/frequency/' + 'frequency_%s.pickle' % (username)
pickle_idea2.deleteRecord(user_freq_record, word)
pickle_idea2.delete_record(user_freq_record, word)
# 模板userpage_get.html中删除单词是异步执行而flash的信息后续是同步执行的所以注释这段代码同时如果这里使用flash但不提取信息则会影响 signup.html的显示。bug复现删除单词后点击退出点击注册注册页面就会出现提示信息
# flash(f'{word} is no longer in your word list.')
return "success"
@ -109,11 +112,11 @@ def deleteword(username, word):
@userService.route("/<username>/userpage", methods=['GET', 'POST'])
def userpage(username):
'''
"""
用户界面
:param username: 用户名
:return: 返回用户界面
'''
"""
# 未登录,跳转到未登录界面
if not session.get('logged_in'):
return render_template('not_login.html')
@ -136,7 +139,7 @@ def userpage(username):
elif request.method == 'GET': # when we load a html page
d = load_freq_history(user_freq_record)
lst = pickle_idea2.dict2lst(d)
lst = pickle_idea2.dict_to_lst(d)
lst2 = []
for t in lst:
lst2.append((t[0], len(t[1])))
@ -159,19 +162,20 @@ def userpage(username):
yml=Yaml.yml,
words=words)
@userService.route("/<username>/mark", methods=['GET', 'POST'])
def user_mark_word(username):
'''
"""
标记单词
:param username: 用户名
:return: 重定位到用户界面
'''
"""
username = session[username]
user_freq_record = path_prefix + 'static/frequency/' + 'frequency_%s.pickle' % (username)
user_freq_record = path_prefix + 'static/frequency/' + 'frequency_%s.pickle' % username
if request.method == 'POST':
# 提交标记的单词
d = load_freq_history(user_freq_record)
lst_history = pickle_idea2.dict2lst(d)
lst_history = pickle_idea2.dict_to_lst(d)
lst = []
for word in request.form.getlist('marked'):
lst.append((word, [get_time()]))
@ -181,10 +185,11 @@ def user_mark_word(username):
else:
return 'Under construction'
def get_time():
'''
"""
获取当前时间
:return: 当前时间
'''
"""
return datetime.now().strftime('%Y%m%d%H%M') # upper to minutes

View File

@ -6,74 +6,74 @@
import collections
import string
import operator
import os, sys # 引入模块sys因为我要用里面的sys.argv列表中的信息来读取命令行参数。
import os, sys # 引入模块sys因为我要用里面的sys.argv列表中的信息来读取命令行参数。
import pickle_idea
def freq(fruit):
'''
def freq(s):
"""
功能 把字符串转成列表 目的是得到每个单词的频率
输入 字符串
输出 列表 列表里包含一组元组每个元组包含单词与单词的频率 比如 [('apple', 2), ('banana', 1)]
注意事项 首先要把字符串转成小写原因是
'''
"""
result = []
fruit = fruit.lower() # 字母转小写
flst = fruit.split() # 字符串转成list
c = collections.Counter(flst)
s = s.lower() # 字母转小写
word_lst = s.split() # 字符串转成list
c = collections.Counter(word_lst)
result = c.most_common()
return result
def youdao_link(s): # 有道链接
link = 'http://youdao.com/w/eng/' + s + '/#keyfrom=dict2.index'# 网址
def youdao_link(word): # 有道链接
link = 'http://youdao.com/w/eng/' + word + '/#keyfrom=dict2.index' # 网址
return link
def file2str(fname):#文件转字符
f = open(fname) #打开
s = f.read() #读取
f.close() #关闭
return s
def file_to_str(f_name): # 文件转字符
f = open(f_name) # 打开
f_str = f.read() # 读取
f.close() # 关闭
return f_str
def remove_punctuation(s): # 这里是s是形参 (parameter)。函数被调用时才给s赋值。
special_characters = '\_©~<=>+-/[]*&$%^@.,?!:;#()"“”—‘’{}|' # 把里面的字符都去掉
def remove_punctuation(s): # 这里是words_text是形参 (parameter)。函数被调用时才给words_text赋值。
special_characters = '\_©~<=>+-/[]*&$%^@.,?!:;#()"“”—‘’{}|' # 把里面的字符都去掉
for c in special_characters:
s = s.replace(c, ' ') # 防止出现把 apple,apple 移掉逗号后变成 appleapple 情况
s = s.replace(c, ' ') # 防止出现把 apple,apple 移掉逗号后变成 appleapple 情况
s = s.replace('--', ' ')
s = s.strip() # 去除前后的空格
s = s.strip() # 去除前后的空格
if '\'' in s:
n = len(s)
t = '' # 用来收集我需要保留的字符
for i in range(n): # 只有单引号前后都有英文字符,才保留
characters = '' # 用来收集我需要保留的字符
for i in range(n): # 只有单引号前后都有英文字符,才保留
if s[i] == '\'':
i_is_ok = i - 1 >= 0 and i + 1 < n
if i_is_ok and s[i-1] in string.ascii_letters and s[i+1] in string.ascii_letters:
t += s[i]
if i_is_ok and s[i - 1] in string.ascii_letters and s[i + 1] in string.ascii_letters:
characters += s[i]
else:
t += s[i]
return t
characters += s[i]
return characters
else:
return s
def sort_in_descending_order(lst):# 单词按频率降序排列
def sort_in_descending_order(lst): # 单词按频率降序排列
lst2 = sorted(lst, reverse=True, key=lambda x: (x[1], x[0]))
return lst2
def sort_in_ascending_order(lst):# 单词按频率降序排列
def sort_in_ascending_order(lst): # 单词按频率降序排列
lst2 = sorted(lst, reverse=False, key=lambda x: (x[1], x[0]))
return lst2
def make_html_page(lst, fname): # 只是在wordfreqCMD.py中的main函数中调用所以不做修改
'''
"""
功能把lst的信息存到fname中以html格式
'''
"""
s = ''
count = 1
for x in lst:
@ -89,22 +89,22 @@ def make_html_page(lst, fname): # 只是在wordfreqCMD.py中的main函数中调
if __name__ == '__main__':
num = len(sys.argv)
if num == 1: # 从键盘读入字符串
if num == 1: # 从键盘读入字符串
s = input()
elif num == 2: # 从文件读入字符串
elif num == 2: # 从文件读入字符串
fname = sys.argv[1]
s = file2str(fname)
s = file_to_str(fname)
else:
print('I can accept at most 2 arguments.')
sys.exit()# 结束程序运行, 下面的代码不会被执行了。
sys.exit() # 结束程序运行, 下面的代码不会被执行了。
s = remove_punctuation(s) # 这里是s是实参(argument),里面有值
s = remove_punctuation(s) # 这里是s是实参(argument),里面有值
L = freq(s)
for x in sort_in_descending_order(L):
print('%s\t%d\t%s' % (x[0], x[1], youdao_link(x[0])))#函数导出
print('%s\t%d\t%s' % (x[0], x[1], youdao_link(x[0]))) # 函数导出
# 把频率的结果放result.html中
make_html_page(sort_in_descending_order(L), 'result.html')
make_html_page(sort_in_descending_order(L), 'result.html')
print('\nHistory:\n')
if os.path.exists('frequency.p'):
@ -112,12 +112,9 @@ if __name__ == '__main__':
else:
d = {}
print(sort_in_descending_order(pickle_idea.dict2lst(d)))
print(sort_in_descending_order(pickle_idea.dict_to_lst(d)))
# 合并频率
lst_history = pickle_idea.dict2lst(d)
lst_history = pickle_idea.dict_to_lst(d)
d = pickle_idea.merge_frequency(L, lst_history)
pickle_idea.save_frequency_to_pickle(d, 'frequency.p')

View File

@ -1,4 +1,4 @@
Flask==1.1.2
Flask==2.2.3
selenium==3.141.0
PyYAML~=6.0
pony==0.7.16