From 77858a21a479ce9043f7425f9a4cbf5eb87c1015 Mon Sep 17 00:00:00 2001
From: Hui Lan <lanhui@zjnu.edu.cn>
Date: Sun, 14 Apr 2019 20:56:43 +0800
Subject: =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E6=96=87=E4=BB=B6=E4=B8=80=E7=AB=A0?=
 =?UTF-8?q?=EF=BC=8C=E4=B8=BB=E8=A6=81=E6=98=AF=E5=AE=9E=E9=AA=8C=E7=AD=94?=
 =?UTF-8?q?=E6=A1=88?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 LectureNotesOnPython.rst | 106 ++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 105 insertions(+), 1 deletion(-)

(limited to 'LectureNotesOnPython.rst')

diff --git a/LectureNotesOnPython.rst b/LectureNotesOnPython.rst
index a729f22..216c148 100644
--- a/LectureNotesOnPython.rst
+++ b/LectureNotesOnPython.rst
@@ -1071,6 +1071,104 @@ key与value互换
 
 
 
+文件
+------------------------------------------------
+
+信息多存储在文件中。所以文件的读写是最最常见的操作。 本节主要考虑纯文本文件。 以下后缀结尾的文件一般都是纯文本文件: txt, csv, html, rst, md。
+
+实验： 读取纽约新生儿的名字统计文件 PopularBabyNames_ 。
+      写命令行程序 lookupname.py 。给定性别与种族，输出最流行的头几个名字。
+      命令行例子： ``python lookupname.py girl white top5`` 。 这个命令输出最流行的5个白人女孩的名字。
+      第一个参数可以是 ``girl/boy`` ， 第二个参数可以是 ``asian/white/black/hispanic`` 。第三个参数以 ``top`` 开始，默认是 1。
+
+.. _PopularBabyNames: https://data.cityofnewyork.us/api/views/25th-nujf/rows.csv?accessType=DOWNLOAD
+
+.. code:: python
+
+         # Copyright (C) 2019 Hui Lan
+         # lanhui AT zjnu.edu.cn
+         # Purpose: 1. Introduce command line argument parsing. 2. Introduce nested dictionaries. 
+         # Usage:
+         #   python lookupname.py asian boy top10
+         #   python lookupname.py white girl top5
+         #   python lookupname.py girl white top 
+         
+         
+         def map(x):
+             d = {'FEMALE':'girl', 'MALE':'boy', 'ASIAN AND PACIFIC ISLANDER':'asian', 'ASIAN AND PACI':'asian',
+                  'BLACK NON HISPANIC':'black', 'BLACK NON HISP':'black', 'HISPANIC':'hispanic', 'WHITE NON HISPANIC':'white', 'WHITE NON HISP':'white'}
+             return d[x]
+         
+         
+         def file2dict(fname):
+             d = {} # will be a nested dictionary: e.g., d[gender] = {'asian':{'name':count}, 'black':[], 'white':[], 'hispanic':[]}
+             f = open(fname)
+             lines = f.readlines()
+             for line in lines[1:]:
+                 line = line.strip()
+                 lst = line.split(',')
+                 gender = map(lst[1])
+                 ethnicity = map(lst[2])
+                 firstname = lst[3].title()
+                 count = int(lst[4])
+                 if not gender in d:
+                     d[gender] = {ethnicity: {firstname:count}}
+                 else:
+                     if not ethnicity in d[gender]:
+                         d[gender][ethnicity] = {firstname:count}
+                     else:
+                         if not firstname in d[gender][ethnicity]:
+                             d[gender][ethnicity][firstname] = count
+                         else:
+                             d[gender][ethnicity][firstname] += count
+             f.close()
+             return d
+         
+         
+         def get_commandline_parameter(lst):
+             d = {'gender':'', 'ethnicity':'', 'top':1}
+             for x in lst:
+                 o = x.lower()
+                 if o in ['asian', 'black', 'white', 'hispanic']:
+                     d['ethnicity'] = o
+                 elif o in ['girl', 'boy']:
+                     d['gender'] = o
+                 elif o == 'top':
+                     pass # use default value 1
+                 elif 'top' in o:
+                     d['top'] = int(o[3:])
+                 else:
+                     raise Exception('Not recognised option %s' % (x))
+             return d
+         
+         
+         def sort_by_value(d):
+             ''' Return a sorted list of tuples, each tuple containing a key and a value.
+                 Note that the tuples are order in descending order of the value.'''
+             import operator
+             lst = sorted(d.items(), key=operator.itemgetter(1), reverse=True)    
+             return lst
+         
+         
+         import sys
+         if __name__ == '__main__':
+             d = file2dict('Popular_Baby_Names.csv')
+             args = get_commandline_parameter(sys.argv[1:])
+             gender = args['gender']
+             ethnicity = args['ethnicity']
+             top = args['top']
+             d2 = d[gender][ethnicity]
+             lst = sort_by_value(d2)
+             for i in range(top):
+                 print(lst[i][0])
+         
+
+
+
+
+
+
+
 排序
 ------------------------------------------------
 
@@ -1121,6 +1219,9 @@ Python自带的 ``sorted`` 可以很好满足排序需求。
 
 .. code:: python
 
+         # Copyright (C) 2019 Hui Lan
+         # lanhui AT zjnu.edu.cn
+
          def swap(L, i, j):
              L[j], L[i] = L[i], L[j]
          
@@ -1157,6 +1258,9 @@ Python自带的 ``sorted`` 可以很好满足排序需求。
 
 .. code:: python
 
+         # Copyright (C) 2019 Hui Lan
+         # lanhui AT zjnu.edu.cn
+
          def _merge(L, R):
              ''' Return a sorted list that combines the sorted list L and sorted list R.'''
              nL = len(L)
@@ -1231,7 +1335,7 @@ Python 自带的排序算法最快， ``selection_sort`` 最慢。
          result2 = selection_sort(L)
          print(time.time() - now)
          
-         assert result0== result1
+         assert result0 == result1
          assert result1 == result2
          
 
-- 
cgit v1.2.1