From 77858a21a479ce9043f7425f9a4cbf5eb87c1015 Mon Sep 17 00:00:00 2001 From: Hui Lan Date: Sun, 14 Apr 2019 20:56:43 +0800 Subject: =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E6=96=87=E4=BB=B6=E4=B8=80=E7=AB=A0?= =?UTF-8?q?=EF=BC=8C=E4=B8=BB=E8=A6=81=E6=98=AF=E5=AE=9E=E9=AA=8C=E7=AD=94?= =?UTF-8?q?=E6=A1=88?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- LectureNotesOnPython.rst | 106 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 105 insertions(+), 1 deletion(-) (limited to 'LectureNotesOnPython.rst') diff --git a/LectureNotesOnPython.rst b/LectureNotesOnPython.rst index a729f22..216c148 100644 --- a/LectureNotesOnPython.rst +++ b/LectureNotesOnPython.rst @@ -1071,6 +1071,104 @@ key与value互换 +文件 +------------------------------------------------ + +信息多存储在文件中。所以文件的读写是最最常见的操作。 本节主要考虑纯文本文件。 以下后缀结尾的文件一般都是纯文本文件: txt, csv, html, rst, md。 + +实验: 读取纽约新生儿的名字统计文件 PopularBabyNames_ 。 + 写命令行程序 lookupname.py 。给定性别与种族,输出最流行的头几个名字。 + 命令行例子: ``python lookupname.py girl white top5`` 。 这个命令输出最流行的5个白人女孩的名字。 + 第一个参数可以是 ``girl/boy`` , 第二个参数可以是 ``asian/white/black/hispanic`` 。第三个参数以 ``top`` 开始,默认是 1。 + +.. _PopularBabyNames: https://data.cityofnewyork.us/api/views/25th-nujf/rows.csv?accessType=DOWNLOAD + +.. code:: python + + # Copyright (C) 2019 Hui Lan + # lanhui AT zjnu.edu.cn + # Purpose: 1. Introduce command line argument parsing. 2. Introduce nested dictionaries. + # Usage: + # python lookupname.py asian boy top10 + # python lookupname.py white girl top5 + # python lookupname.py girl white top + + + def map(x): + d = {'FEMALE':'girl', 'MALE':'boy', 'ASIAN AND PACIFIC ISLANDER':'asian', 'ASIAN AND PACI':'asian', + 'BLACK NON HISPANIC':'black', 'BLACK NON HISP':'black', 'HISPANIC':'hispanic', 'WHITE NON HISPANIC':'white', 'WHITE NON HISP':'white'} + return d[x] + + + def file2dict(fname): + d = {} # will be a nested dictionary: e.g., d[gender] = {'asian':{'name':count}, 'black':[], 'white':[], 'hispanic':[]} + f = open(fname) + lines = f.readlines() + for line in lines[1:]: + line = line.strip() + lst = line.split(',') + gender = map(lst[1]) + ethnicity = map(lst[2]) + firstname = lst[3].title() + count = int(lst[4]) + if not gender in d: + d[gender] = {ethnicity: {firstname:count}} + else: + if not ethnicity in d[gender]: + d[gender][ethnicity] = {firstname:count} + else: + if not firstname in d[gender][ethnicity]: + d[gender][ethnicity][firstname] = count + else: + d[gender][ethnicity][firstname] += count + f.close() + return d + + + def get_commandline_parameter(lst): + d = {'gender':'', 'ethnicity':'', 'top':1} + for x in lst: + o = x.lower() + if o in ['asian', 'black', 'white', 'hispanic']: + d['ethnicity'] = o + elif o in ['girl', 'boy']: + d['gender'] = o + elif o == 'top': + pass # use default value 1 + elif 'top' in o: + d['top'] = int(o[3:]) + else: + raise Exception('Not recognised option %s' % (x)) + return d + + + def sort_by_value(d): + ''' Return a sorted list of tuples, each tuple containing a key and a value. + Note that the tuples are order in descending order of the value.''' + import operator + lst = sorted(d.items(), key=operator.itemgetter(1), reverse=True) + return lst + + + import sys + if __name__ == '__main__': + d = file2dict('Popular_Baby_Names.csv') + args = get_commandline_parameter(sys.argv[1:]) + gender = args['gender'] + ethnicity = args['ethnicity'] + top = args['top'] + d2 = d[gender][ethnicity] + lst = sort_by_value(d2) + for i in range(top): + print(lst[i][0]) + + + + + + + + 排序 ------------------------------------------------ @@ -1121,6 +1219,9 @@ Python自带的 ``sorted`` 可以很好满足排序需求。 .. code:: python + # Copyright (C) 2019 Hui Lan + # lanhui AT zjnu.edu.cn + def swap(L, i, j): L[j], L[i] = L[i], L[j] @@ -1157,6 +1258,9 @@ Python自带的 ``sorted`` 可以很好满足排序需求。 .. code:: python + # Copyright (C) 2019 Hui Lan + # lanhui AT zjnu.edu.cn + def _merge(L, R): ''' Return a sorted list that combines the sorted list L and sorted list R.''' nL = len(L) @@ -1231,7 +1335,7 @@ Python 自带的排序算法最快, ``selection_sort`` 最慢。 result2 = selection_sort(L) print(time.time() - now) - assert result0== result1 + assert result0 == result1 assert result1 == result2 -- cgit v1.2.1