diff --git a/app/Article.py b/app/Article.py index df9ac3a..d5e63cd 100644 --- a/app/Article.py +++ b/app/Article.py @@ -22,12 +22,12 @@ def total_number_of_essays(): return len(result) -def get_article_title(s): - return s.split('\n')[0] +def get_article_title(article): + return article.split('\n')[0] -def get_article_body(s): - lst = s.split('\n') +def get_article_body(article): + lst = article.split('\n') lst.pop(0) # remove the first line return '\n'.join(lst) @@ -111,11 +111,11 @@ def within_range(x, y, r): return x > y and abs(x - y) <= r -def get_question_part(s): - s = s.strip() +def get_question_part(article): + article = article.strip() result = [] flag = 0 - for line in s.split('\n'): + for line in article.split('\n'): line = line.strip() if line == 'QUESTION': result.append(line) @@ -127,11 +127,11 @@ def get_question_part(s): return '\n'.join(result) -def get_answer_part(s): - s = s.strip() +def get_answer_part(article): + article = article.strip() result = [] flag = 0 - for line in s.split('\n'): + for line in article.split('\n'): line = line.strip() if line == 'ANSWER': flag = 1 diff --git a/app/Login.py b/app/Login.py index cd750d1..d3d609e 100644 --- a/app/Login.py +++ b/app/Login.py @@ -3,23 +3,26 @@ import string from datetime import datetime, timedelta from UseSqlite import InsertQuery, RecordQuery + def md5(s): - ''' + """ MD5摘要 :param str: 字符串 :return: 经MD5以后的字符串 - ''' + """ h = hashlib.md5(s.encode(encoding='utf-8')) return h.hexdigest() + # import model.user after the defination of md5(s) to avoid circular import from model.user import get_user_by_username, insert_user, update_password_by_username path_prefix = '/var/www/wordfreq/wordfreq/' path_prefix = './' # comment this line in deployment -def verify_pass(newpass,oldpass): - if(newpass==oldpass): + +def verify_pass(new_password, old_password): + if new_password == old_password: return True @@ -43,17 +46,17 @@ def check_username_availability(username): def change_password(username, old_password, new_password): - ''' + """ 修改密码 :param username: 用户名 :param old_password: 旧的密码 :param new_password: 新密码 :return: 修改成功:True 否则:False - ''' + """ if not verify_user(username, old_password): # 旧密码错误 return False # 将用户名和密码一起加密,以免暴露不同用户的相同密码 - if verify_pass(new_password,old_password): #新旧密码一致 + if verify_pass(new_password, old_password): # 新旧密码一致 return False update_password_by_username(username, new_password) return True @@ -66,6 +69,7 @@ def get_expiry_date(username): else: return user.expiry_date + class UserName: def __init__(self, username): self.username = username @@ -73,11 +77,11 @@ class UserName: def validate(self): if len(self.username) > 20: return f'{self.username} is too long. The user name cannot exceed 20 characters.' - if self.username.startswith('.'): # a user name must not start with a dot + if self.username.startswith('.'): # a user name must not start with a dot return 'Period (.) is not allowed as the first letter in the user name.' - if ' ' in self.username: # a user name must not include a whitespace + if ' ' in self.username: # a user name must not include a whitespace return 'Whitespace is not allowed in the user name.' - for c in self.username: # a user name must not include special characters, except non-leading periods or underscores + for c in self.username: # a user name must not include special characters, except non-leading periods or underscores if c in string.punctuation and c != '.' and c != '_': return f'{c} is not allowed in the user name.' if self.username in ['signup', 'login', 'logout', 'reset', 'mark', 'back', 'unfamiliar', 'familiar', 'del', 'admin']: diff --git a/app/UseSqlite.py b/app/UseSqlite.py index ea4baeb..90d5dae 100644 --- a/app/UseSqlite.py +++ b/app/UseSqlite.py @@ -9,6 +9,7 @@ import sqlite3 + class Sqlite3Template: def __init__(self, db_fname): self.db_fname = db_fname @@ -72,7 +73,6 @@ class RecordQuery(Sqlite3Template): return result - if __name__ == '__main__': #iq = InsertQuery('RiskDB.db') diff --git a/app/WordFreq.py b/app/WordFreq.py index 3620a41..ffdc246 100644 --- a/app/WordFreq.py +++ b/app/WordFreq.py @@ -6,6 +6,7 @@ from wordfreqCMD import remove_punctuation, freq, sort_in_descending_order import string + class WordFreq: def __init__(self, s): self.s = remove_punctuation(s) diff --git a/app/Yaml.py b/app/Yaml.py index 00974aa..01fb184 100644 --- a/app/Yaml.py +++ b/app/Yaml.py @@ -1,10 +1,10 @@ -''' +""" Yaml.py 配置文件包括: ./static/config.yml ./layout/partial/header.html ./layout/partial/footer.html -''' +""" import yaml as YAML import os @@ -15,7 +15,7 @@ ymlPath = path_prefix + 'static/config.yml' # partial文件夹路径 partialPath = path_prefix + 'layout/partial/' -f = open(ymlPath, 'r', encoding='utf-8') # 以'UTF-8'格式打开YAML文件 +f = open(ymlPath, 'r', encoding='utf-8') # 以'UTF-8'格式打开YAML文件 cont = f.read() # 以文本形式读取YAML yml = YAML.load(cont, Loader=YAML.FullLoader) # 加载YAML diff --git a/app/account_service.py b/app/account_service.py index a7ed0c4..eb396ca 100644 --- a/app/account_service.py +++ b/app/account_service.py @@ -8,10 +8,10 @@ accountService = Blueprint("accountService", __name__) ### Sign-up, login, logout ### @accountService.route("/signup", methods=['GET', 'POST']) def signup(): - ''' + """ 注册 :return: 根据注册是否成功返回不同界面 - ''' + """ if request.method == 'GET': # GET方法直接返回注册页面 return render_template('signup.html') @@ -19,12 +19,12 @@ def signup(): # POST方法需判断是否注册成功,再根据结果返回不同的内容 username = escape(request.form['username']) password = escape(request.form['password']) - - #! 添加如下代码为了过滤注册时的非法字符 + + # ! 添加如下代码为了过滤注册时的非法字符 warn = WarningMessage(username) if str(warn) != 'OK': return jsonify({'status': '3', 'warn': str(warn)}) - + available = check_username_availability(username) if not available: # 用户名不可用 return jsonify({'status': '0'}) @@ -43,13 +43,12 @@ def signup(): return jsonify({'status': '1'}) - @accountService.route("/login", methods=['GET', 'POST']) def login(): - ''' + """ 登录 :return: 根据登录是否成功返回不同页面 - ''' + """ if request.method == 'GET': # GET请求 return render_template('login.html') @@ -74,10 +73,10 @@ def login(): @accountService.route("/logout", methods=['GET', 'POST']) def logout(): - ''' + """ 登出 :return: 重定位到主界面 - ''' + """ # 将session标记为登出状态 session['logged_in'] = False return redirect(url_for('mainpage')) @@ -85,10 +84,10 @@ def logout(): @accountService.route("/reset", methods=['GET', 'POST']) def reset(): - ''' + """ 重设密码 :return: 返回适当的页面 - ''' + """ # 下列方法用于防止未登录状态下的修改密码 if not session.get('logged_in'): return render_template('login.html') @@ -102,9 +101,9 @@ def reset(): # POST请求用于提交修改后信息 old_password = escape(request.form['old-password']) new_password = escape(request.form['new-password']) - flag = change_password(username, old_password, new_password) # flag表示是否修改成功 + flag = change_password(username, old_password, new_password) # flag表示是否修改成功 if flag: session['logged_in'] = False - return jsonify({'status':'1'}) # 修改成功 + return jsonify({'status': '1'}) # 修改成功 else: - return jsonify({'status':'2'}) # 修改失败 + return jsonify({'status': '2'}) # 修改失败 diff --git a/app/admin_service.py b/app/admin_service.py index a604b5e..959db6b 100644 --- a/app/admin_service.py +++ b/app/admin_service.py @@ -52,7 +52,7 @@ def article(): max(1, int(request.args.get("page", 1))), _article_number // _page_size + (_article_number % _page_size > 0) ) # 最小的page是1 except ValueError: - return "page parmas must be int!" + return "page params must be int!" _articles = get_page_articles(_cur_page, _page_size) for article in _articles: # 获取每篇文章的title diff --git a/app/difficulty.py b/app/difficulty.py index cb93768..d500cca 100644 --- a/app/difficulty.py +++ b/app/difficulty.py @@ -18,25 +18,25 @@ def load_record(pickle_fname): return d -def convert_test_type_to_difficulty_level(d): +def convert_test_type_to_difficulty_level(words_dict): """ 对原本的单词库中的单词进行难度评级 - :param d: 存储了单词库pickle文件中的单词的字典 + :param words_dict: 存储了单词库pickle文件中的单词的字典 :return: """ result = {} - L = list(d.keys()) # in d, we have test types (e.g., CET4,CET6,BBC) for each word + words_lst = list(words_dict.keys()) # in words_dict, we have test types (e.g., CET4,CET6,BBC) for each word - for k in L: - if 'CET4' in d[k]: + for k in words_lst: + if 'CET4' in words_dict[k]: result[k] = 4 # CET4 word has level 4 - elif 'OXFORD3000' in d[k]: + elif 'OXFORD3000' in words_dict[k]: result[k] = 5 - elif 'CET6' in d[k] or 'GRADUATE' in d[k]: + elif 'CET6' in words_dict[k] or 'GRADUATE' in words_dict[k]: result[k] = 6 - elif 'OXFORD5000' in d[k] or 'IELTS' in d[k]: + elif 'OXFORD5000' in words_dict[k] or 'IELTS' in words_dict[k]: result[k] = 7 - elif 'BBC' in d[k]: + elif 'BBC' in words_dict[k]: result[k] = 8 return result # {'apple': 4, ...} @@ -65,10 +65,10 @@ def get_difficulty_level_for_user(d1, d2): def revert_dict(d): - ''' + """ In d, word is the key, and value is a list of dates. In d2 (the returned value of this function), time is the key, and the value is a list of words picked at that time. - ''' + """ d2 = {} for k in d: if type(d[k]) is list: # d[k] is a list of dates. @@ -80,7 +80,7 @@ def revert_dict(d): for time_info in lst: date = time_info[:10] # until hour - if not date in d2: + if date not in d2: d2[date] = [k] else: d2[date].append(k) @@ -105,7 +105,7 @@ def user_difficulty_level(d_user, d): word = t[0] hard = t[1] # print('WORD %s HARD %4.2f' % (word, hard)) - geometric = geometric * (hard) + geometric = geometric * hard count += 1 if count >= 10: return geometric ** (1 / count) @@ -131,7 +131,7 @@ def text_difficulty_level(s, d): for t in lst2: word = t[0] hard = t[1] - geometric = geometric * (hard) + geometric = geometric * hard count += 1 if count >= 20: # we look for n most difficult words return geometric ** (1 / count) diff --git a/app/main.py b/app/main.py index 4e3f829..2267dce 100644 --- a/app/main.py +++ b/app/main.py @@ -23,33 +23,34 @@ app.register_blueprint(adminService) path_prefix = '/var/www/wordfreq/wordfreq/' path_prefix = './' # comment this line in deployment + def get_random_image(path): - ''' + """ 返回随机图 :param path: 图片文件(JPEG格式),不包含后缀名 :return: - ''' + """ img_path = random.choice(glob.glob(os.path.join(path, '*.jpg'))) return img_path[img_path.rfind('/static'):] def get_random_ads(): - ''' + """ 返回随机广告 :return: 一个广告(包含HTML标签) - ''' + """ return random.choice(['个性化分析精准提升', '你的专有单词本', '智能捕捉阅读弱点,针对性提高你的阅读水平']) def appears_in_test(word, d): - ''' + """ 如果字符串里没有指定的单词,则返回逗号加单词 :param word: 指定单词 :param d: 字符串 :return: 逗号加单词 - ''' - if not word in d: + """ + if word not in d: return '' else: return ','.join(d[word]) @@ -57,36 +58,36 @@ def appears_in_test(word, d): @app.route("/mark", methods=['GET', 'POST']) def mark_word(): - ''' + """ 标记单词 :return: 重定位到主界面 - ''' + """ if request.method == 'POST': d = load_freq_history(path_prefix + 'static/frequency/frequency.p') - lst_history = pickle_idea.dict2lst(d) + lst_history = pickle_idea.dict_to_lst(d) lst = [] for word in request.form.getlist('marked'): lst.append((word, 1)) d = pickle_idea.merge_frequency(lst, lst_history) pickle_idea.save_frequency_to_pickle(d, path_prefix + 'static/frequency/frequency.p') return redirect(url_for('mainpage')) - else: # 不回应GET请求 + else: # 不回应GET请求 return 'Under construction' @app.route("/", methods=['GET', 'POST']) def mainpage(): - ''' + """ 根据GET或POST方法来返回不同的主界面 :return: 主界面 - ''' + """ if request.method == 'POST': # when we submit a form content = escape(request.form['content']) f = WordFreq(content) lst = f.get_freq() # save history d = load_freq_history(path_prefix + 'static/frequency/frequency.p') - lst_history = pickle_idea.dict2lst(d) + lst_history = pickle_idea.dict_to_lst(d) d = pickle_idea.merge_frequency(lst, lst_history) pickle_idea.save_frequency_to_pickle(d, path_prefix + 'static/frequency/frequency.p') return render_template('mainpage_post.html', lst=lst, yml=Yaml.yml) @@ -96,8 +97,8 @@ def mainpage(): number_of_essays = total_number_of_essays() d = load_freq_history(path_prefix + 'static/frequency/frequency.p') d_len = len(d) - lst = sort_in_descending_order(pickle_idea.dict2lst(d)) - return render_template('mainpage_get.html', + lst = sort_in_descending_order(pickle_idea.dict_to_lst(d)) + return render_template('mainpage_get.html', admin_name=ADMIN_NAME, random_ads=random_ads, d_len=d_len, diff --git a/app/pickle_idea.py b/app/pickle_idea.py index 45bd19a..3e87796 100644 --- a/app/pickle_idea.py +++ b/app/pickle_idea.py @@ -10,29 +10,29 @@ import pickle from datetime import datetime -def lst2dict(lst, d): - ''' +def lst_to_dict(lst, d): + """ Store the information in list lst to dictionary d. Note: nothing is returned. - ''' + """ for x in lst: word = x[0] freq = x[1] - if not word in d: + if word not in d: d[word] = freq else: d[word] += freq -def dict2lst(d): - return list(d.items()) # a list of (key, value) pairs +def dict_to_lst(d): + return list(d.items()) # a list of (key, value) pairs -def merge_frequency(lst1, lst2): +def merge_frequency(list1, list2): d = {} - lst2dict(lst1, d) - lst2dict(lst2, d) + lst_to_dict(list1, d) + lst_to_dict(list2, d) return d @@ -54,33 +54,35 @@ def save_frequency_to_pickle(d, pickle_fname): pickle.dump(d2, f) f.close() -def unfamiliar(path,word): - f = open(path,"rb") + +def unfamiliar(path, word): + f = open(path, "rb") dic = pickle.load(f) dic[word] += [datetime.now().strftime('%Y%m%d%H%M')] - fp = open(path,"wb") - pickle.dump(dic,fp) + fp = open(path, "wb") + pickle.dump(dic, fp) -def familiar(path,word): - f = open(path,"rb") + +def familiar(path, word): + f = open(path, "rb") dic = pickle.load(f) - if len(dic[word])>1: + if len(dic[word]) > 1: del dic[word][0] else: dic.pop(word) - fp = open(path,"wb") - pickle.dump(dic,fp) + fp = open(path, "wb") + pickle.dump(dic, fp) + if __name__ == '__main__': - lst1 = [('apple',2), ('banana',1)] + lst1 = [('apple', 2), ('banana', 1)] d = {} - lst2dict(lst1, d) # d will change - save_frequency_to_pickle(d, 'frequency.p') # frequency.p is our database + lst_to_dict(lst1, d) # d will change + save_frequency_to_pickle(d, 'frequency.p') # frequency.p is our database - - lst2 = [('banana',2), ('orange', 4)] + lst2 = [('banana', 2), ('orange', 4)] d = load_record('frequency.p') - lst1 = dict2lst(d) + lst1 = dict_to_lst(d) d = merge_frequency(lst2, lst1) print(d) diff --git a/app/pickle_idea2.py b/app/pickle_idea2.py index 0da55bc..3771bae 100644 --- a/app/pickle_idea2.py +++ b/app/pickle_idea2.py @@ -11,21 +11,23 @@ import pickle from datetime import datetime -def lst2dict(lst, d): - ''' + +def lst_to_dict(lst, d): + """ Store the information in list lst to dictionary d. Note: nothing is returned. - ''' + """ for x in lst: word = x[0] dates = x[1] - if not word in d: + if word not in d: d[word] = dates else: d[word] += dates -def deleteRecord(path,word): + +def delete_record(path, word): with open(path, 'rb') as f: db = pickle.load(f) try: @@ -33,9 +35,10 @@ def deleteRecord(path,word): except KeyError: print("sorry") with open(path, 'wb') as ff: - pickle.dump(db, ff) + pickle.dump(db, ff) -def dict2lst(d): + +def dict_to_lst(d): if len(d) > 0: keys = list(d.keys()) if isinstance(d[keys[0]], int): @@ -44,14 +47,15 @@ def dict2lst(d): lst.append((k, [datetime.now().strftime('%Y%m%d%H%M')])) return lst elif isinstance(d[keys[0]], list): - return list(d.items()) # a list of (key, value) pairs + return list(d.items()) # a list of (key, value) pairs return [] + def merge_frequency(lst1, lst2): d = {} - lst2dict(lst1, d) - lst2dict(lst2, d) + lst_to_dict(lst1, d) + lst_to_dict(lst2, d) return d @@ -67,23 +71,22 @@ def save_frequency_to_pickle(d, pickle_fname): exclusion_lst = ['one', 'no', 'has', 'had', 'do', 'that', 'have', 'by', 'not', 'but', 'we', 'this', 'my', 'him', 'so', 'or', 'as', 'are', 'it', 'from', 'with', 'be', 'can', 'for', 'an', 'if', 'who', 'whom', 'whose', 'which', 'the', 'to', 'a', 'of', 'and', 'you', 'i', 'he', 'she', 'they', 'me', 'was', 'were', 'is', 'in', 'at', 'on', 'their', 'his', 'her', 's', 'said', 'all', 'did', 'been', 'w'] d2 = {} for k in d: - if not k in exclusion_lst and not k.isnumeric() and not len(k) < 2: - d2[k] = list(sorted(d[k])) # 原先这里是d2[k] = list(sorted(set(d[k]))) + if k not in exclusion_lst and not k.isnumeric() and not len(k) < 2: + d2[k] = list(sorted(d[k])) # 原先这里是d2[k] = list(sorted(set(d[k]))) pickle.dump(d2, f) f.close() - if __name__ == '__main__': - lst1 = [('apple',['201910251437', '201910251438']), ('banana',['201910251439'])] + lst1 = [('apple',['201910251437', '201910251438']), ('banana', ['201910251439'])] d = {} - lst2dict(lst1, d) # d will change + lst_to_dict(lst1, d) # d will change save_frequency_to_pickle(d, 'frequency.p') # frequency.p is our database lst2 = [('banana',['201910251439']), ('orange', ['201910251440', '201910251439'])] d = load_record('frequency.p') - lst1 = dict2lst(d) + lst1 = dict_to_lst(d) d = merge_frequency(lst2, lst1) print(d) diff --git a/app/user_service.py b/app/user_service.py index 2e5feed..a480247 100644 --- a/app/user_service.py +++ b/app/user_service.py @@ -21,6 +21,7 @@ userService = Blueprint("user_bp", __name__) path_prefix = '/var/www/wordfreq/wordfreq/' path_prefix = './' # comment this line in deployment + @userService.route("/get_next_article/",methods=['GET','POST']) def get_next_article(username): user_freq_record = path_prefix + 'static/frequency/' + 'frequency_%s.pickle' % (username) @@ -42,13 +43,14 @@ def get_next_article(username): return 'Under construction' return json.dumps(data) + @userService.route("/get_pre_article/",methods=['GET']) def get_pre_article(username): user_freq_record = path_prefix + 'static/frequency/' + 'frequency_%s.pickle' % (username) if request.method == 'GET': visited_articles = session.get("visited_articles") - if(visited_articles["index"]==0): - data='' + if visited_articles["index"] == 0: + data = '' else: visited_articles["index"] -= 1 # 上一篇,index-=1 if visited_articles['article_ids'][-1] == "null": # 如果当前还是“null”,则将“null”pop出来 @@ -58,19 +60,20 @@ def get_pre_article(username): data = { 'visited_articles': visited_articles, 'today_article': today_article, - 'result_of_generate_article':result_of_generate_article + 'result_of_generate_article': result_of_generate_article } return json.dumps(data) + @userService.route("///unfamiliar", methods=['GET', 'POST']) def unfamiliar(username, word): - ''' + """ :param username: :param word: :return: - ''' - user_freq_record = path_prefix + 'static/frequency/' + 'frequency_%s.pickle' % (username) + """ + user_freq_record = path_prefix + 'static/frequency/' + 'frequency_%s.pickle' % username pickle_idea.unfamiliar(user_freq_record, word) session['thisWord'] = word # 1. put a word into session session['time'] = 1 @@ -79,13 +82,13 @@ def unfamiliar(username, word): @userService.route("///familiar", methods=['GET', 'POST']) def familiar(username, word): - ''' + """ :param username: :param word: :return: - ''' - user_freq_record = path_prefix + 'static/frequency/' + 'frequency_%s.pickle' % (username) + """ + user_freq_record = path_prefix + 'static/frequency/' + 'frequency_%s.pickle' % username pickle_idea.familiar(user_freq_record, word) session['thisWord'] = word # 1. put a word into session session['time'] = 1 @@ -93,15 +96,15 @@ def familiar(username, word): @userService.route("///del", methods=['GET', 'POST']) -def deleteword(username, word): - ''' +def delete_word(username, word): + """ 删除单词 :param username: 用户名 :param word: 单词 :return: 重定位到用户界面 - ''' + """ user_freq_record = path_prefix + 'static/frequency/' + 'frequency_%s.pickle' % (username) - pickle_idea2.deleteRecord(user_freq_record, word) + pickle_idea2.delete_record(user_freq_record, word) # 模板userpage_get.html中删除单词是异步执行,而flash的信息后续是同步执行的,所以注释这段代码;同时如果这里使用flash但不提取信息,则会影响 signup.html的显示。bug复现:删除单词后,点击退出,点击注册,注册页面就会出现提示信息 # flash(f'{word} is no longer in your word list.') return "success" @@ -109,11 +112,11 @@ def deleteword(username, word): @userService.route("//userpage", methods=['GET', 'POST']) def userpage(username): - ''' + """ 用户界面 :param username: 用户名 :return: 返回用户界面 - ''' + """ # 未登录,跳转到未登录界面 if not session.get('logged_in'): return render_template('not_login.html') @@ -136,7 +139,7 @@ def userpage(username): elif request.method == 'GET': # when we load a html page d = load_freq_history(user_freq_record) - lst = pickle_idea2.dict2lst(d) + lst = pickle_idea2.dict_to_lst(d) lst2 = [] for t in lst: lst2.append((t[0], len(t[1]))) @@ -159,19 +162,20 @@ def userpage(username): yml=Yaml.yml, words=words) + @userService.route("//mark", methods=['GET', 'POST']) def user_mark_word(username): - ''' + """ 标记单词 :param username: 用户名 :return: 重定位到用户界面 - ''' + """ username = session[username] - user_freq_record = path_prefix + 'static/frequency/' + 'frequency_%s.pickle' % (username) + user_freq_record = path_prefix + 'static/frequency/' + 'frequency_%s.pickle' % username if request.method == 'POST': # 提交标记的单词 d = load_freq_history(user_freq_record) - lst_history = pickle_idea2.dict2lst(d) + lst_history = pickle_idea2.dict_to_lst(d) lst = [] for word in request.form.getlist('marked'): lst.append((word, [get_time()])) @@ -181,10 +185,11 @@ def user_mark_word(username): else: return 'Under construction' + def get_time(): - ''' + """ 获取当前时间 :return: 当前时间 - ''' + """ return datetime.now().strftime('%Y%m%d%H%M') # upper to minutes diff --git a/app/wordfreqCMD.py b/app/wordfreqCMD.py index e56ba0c..53ea7f8 100644 --- a/app/wordfreqCMD.py +++ b/app/wordfreqCMD.py @@ -6,74 +6,74 @@ import collections import string import operator -import os, sys # 引入模块sys,因为我要用里面的sys.argv列表中的信息来读取命令行参数。 +import os, sys # 引入模块sys,因为我要用里面的sys.argv列表中的信息来读取命令行参数。 import pickle_idea -def freq(fruit): - ''' + +def freq(s): + """ 功能: 把字符串转成列表。 目的是得到每个单词的频率。 输入: 字符串 输出: 列表, 列表里包含一组元组,每个元组包含单词与单词的频率。 比如 [('apple', 2), ('banana', 1)] 注意事项: 首先要把字符串转成小写。原因是。。。 - ''' + """ result = [] - - fruit = fruit.lower() # 字母转小写 - flst = fruit.split() # 字符串转成list - c = collections.Counter(flst) + s = s.lower() # 字母转小写 + word_lst = s.split() # 字符串转成list + c = collections.Counter(word_lst) result = c.most_common() return result -def youdao_link(s): # 有道链接 - link = 'http://youdao.com/w/eng/' + s + '/#keyfrom=dict2.index'# 网址 +def youdao_link(word): # 有道链接 + link = 'http://youdao.com/w/eng/' + word + '/#keyfrom=dict2.index' # 网址 return link -def file2str(fname):#文件转字符 - f = open(fname) #打开 - s = f.read() #读取 - f.close() #关闭 - return s +def file_to_str(f_name): # 文件转字符 + f = open(f_name) # 打开 + f_str = f.read() # 读取 + f.close() # 关闭 + return f_str -def remove_punctuation(s): # 这里是s是形参 (parameter)。函数被调用时才给s赋值。 - special_characters = '\_©~<=>+-/[]*&$%^@.,?!:;#()"“”—‘’{}|' # 把里面的字符都去掉 +def remove_punctuation(s): # 这里是words_text是形参 (parameter)。函数被调用时才给words_text赋值。 + special_characters = '\_©~<=>+-/[]*&$%^@.,?!:;#()"“”—‘’{}|' # 把里面的字符都去掉 for c in special_characters: - s = s.replace(c, ' ') # 防止出现把 apple,apple 移掉逗号后变成 appleapple 情况 + s = s.replace(c, ' ') # 防止出现把 apple,apple 移掉逗号后变成 appleapple 情况 s = s.replace('--', ' ') - s = s.strip() # 去除前后的空格 - + s = s.strip() # 去除前后的空格 + if '\'' in s: n = len(s) - t = '' # 用来收集我需要保留的字符 - for i in range(n): # 只有单引号前后都有英文字符,才保留 + characters = '' # 用来收集我需要保留的字符 + for i in range(n): # 只有单引号前后都有英文字符,才保留 if s[i] == '\'': i_is_ok = i - 1 >= 0 and i + 1 < n - if i_is_ok and s[i-1] in string.ascii_letters and s[i+1] in string.ascii_letters: - t += s[i] + if i_is_ok and s[i - 1] in string.ascii_letters and s[i + 1] in string.ascii_letters: + characters += s[i] else: - t += s[i] - return t + characters += s[i] + return characters else: return s -def sort_in_descending_order(lst):# 单词按频率降序排列 +def sort_in_descending_order(lst): # 单词按频率降序排列 lst2 = sorted(lst, reverse=True, key=lambda x: (x[1], x[0])) return lst2 -def sort_in_ascending_order(lst):# 单词按频率降序排列 +def sort_in_ascending_order(lst): # 单词按频率降序排列 lst2 = sorted(lst, reverse=False, key=lambda x: (x[1], x[0])) return lst2 def make_html_page(lst, fname): # 只是在wordfreqCMD.py中的main函数中调用,所以不做修改 - ''' + """ 功能:把lst的信息存到fname中,以html格式。 - ''' + """ s = '' count = 1 for x in lst: @@ -89,22 +89,22 @@ def make_html_page(lst, fname): # 只是在wordfreqCMD.py中的main函数中调 if __name__ == '__main__': num = len(sys.argv) - if num == 1: # 从键盘读入字符串 + if num == 1: # 从键盘读入字符串 s = input() - elif num == 2: # 从文件读入字符串 + elif num == 2: # 从文件读入字符串 fname = sys.argv[1] - s = file2str(fname) + s = file_to_str(fname) else: print('I can accept at most 2 arguments.') - sys.exit()# 结束程序运行, 下面的代码不会被执行了。 + sys.exit() # 结束程序运行, 下面的代码不会被执行了。 - s = remove_punctuation(s) # 这里是s是实参(argument),里面有值 + s = remove_punctuation(s) # 这里是s是实参(argument),里面有值 L = freq(s) for x in sort_in_descending_order(L): - print('%s\t%d\t%s' % (x[0], x[1], youdao_link(x[0])))#函数导出 + print('%s\t%d\t%s' % (x[0], x[1], youdao_link(x[0]))) # 函数导出 # 把频率的结果放result.html中 - make_html_page(sort_in_descending_order(L), 'result.html') + make_html_page(sort_in_descending_order(L), 'result.html') print('\nHistory:\n') if os.path.exists('frequency.p'): @@ -112,12 +112,9 @@ if __name__ == '__main__': else: d = {} - print(sort_in_descending_order(pickle_idea.dict2lst(d))) + print(sort_in_descending_order(pickle_idea.dict_to_lst(d))) # 合并频率 - lst_history = pickle_idea.dict2lst(d) + lst_history = pickle_idea.dict_to_lst(d) d = pickle_idea.merge_frequency(L, lst_history) pickle_idea.save_frequency_to_pickle(d, 'frequency.p') - - - diff --git a/requirements.txt b/requirements.txt index 338b71c..2210969 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -Flask==1.1.2 +Flask==2.2.3 selenium==3.141.0 PyYAML~=6.0 pony==0.7.16