From 253a32c774e23afe4cbc7b98759f118c403ead1f Mon Sep 17 00:00:00 2001
From: Hui Lan <lanhui@zjnu.edu.cn>
Date: Tue, 10 Mar 2020 12:57:09 +0800
Subject: analyze.py: check if a score file contains BOM (byte order marker)
 before proceeding

---
 analyze.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'analyze.py')

diff --git a/analyze.py b/analyze.py
index ce1e7f3..778d5d2 100644
--- a/analyze.py
+++ b/analyze.py
@@ -43,6 +43,7 @@ import json, os, sys
 
 # Solve UnicodeDecodeError - https://blog.csdn.net/blmoistawinde/article/details/87717065
 import _locale
+import codecs
 _locale._getdefaultlocale = (lambda *args: ['zh_CN', 'utf8'])
 
 
@@ -54,6 +55,7 @@ def get_task_information(fname):
 
 def get_student_information(fname):
     result = []
+
     with open(fname) as f:
         for line in f:
             line = line.strip()
@@ -78,6 +80,14 @@ def get_max_score(d):
 
 def get_student_number(fname):
     d = {}
+
+    # If a file has BOM (Byte Order Marker) charater, stop.
+    with open(fname, 'r+b') as f:
+        s = f.read()
+        if s.startswith(codecs.BOM_UTF8):
+            print('\nERROR: The file %s contains BOM character.  Remove that first.' % (fname))
+            sys.exit()
+    
     f = open(fname)
     for line in f:
         line = line.strip()
-- 
cgit v1.2.1