日本語のcsvを取り扱う際便利(python2)
csvのunicode変換は手間取るけど、良さげな変換をクラスにしてまとめている人がいたので拝借した。
import csv import sys import tempfile import shutil import os calendar_dict = { "January": "1", "February": "2", "March": "3", "April": "4", "May": "5", "June": "6", "July": "7", "August": "8", "September": "9", "October": "10", "November": "11", "December": "12", } class UnicodeCsvReader: """csv.reader wrapper which decodes each value with designated encoding""" def __init__(self, iterable, dialect='excel', encoding="utf-8", *args, **kwds): self.reader = csv.reader(iterable, dialect=dialect, *args, **kwds) self.encoding = encoding self.dialect = self.reader.dialect self.line_num = 0 def __iter__(self): return self def decode(self, value): return value and value.decode(self.encoding) or value def next(self): # csv.reader.next returns a list of values of next row cols = [self.decode(x) for x in self.reader.next()] self.line_num = self.reader.line_num return cols class UnicodeCsvDictReader(csv.DictReader): def __init__(self, f, fieldnames=None, restkey=None, restval=None, dialect="excel", encoding="utf-8", *args, **kwds): csv.DictReader.__init__( self, f, fieldnames, restkey, restval, dialect, *args, **kwds) self.encoding = encoding self.reader = UnicodeCsvReader(f, encoding=encoding) def reader_csv(input_file, e="utf-8"): with open(input_file, 'r') as f: reader = UnicodeCsvReader(f) for row in reader: yield row # ここの変換処理にてすべてOK def writer_tsv(row): try: date = row[0].split() if date[0] in calendar_dict.keys(): row[0] = "-".join([date[2], calendar_dict.get(date[0], ''), date[1]]) row[0] = row[0].split(",")[0] row[1] = row[1].replace(u' ', u' ').replace(u'+', u' ') except Exception as e1: print e1, row finally: print row return u'\t'.join(row) + u'\n' def write_str_into_file(iterable, output_filename,): with tempfile.NamedTemporaryFile(delete=False, dir='/var/tmp',) as f: for row in iterable: f.write(writer_tsv(row).encode('utf-8')) shutil.move(f.name, output_filename) if os.path.exists(f.name): os.remove(f.name) def main(): input_file = sys.sys.argv[1] write_str_into_file( reader_csv(os.path.abspath(os.path.expanduser(input_file))), "/tmp/result.tsv") if __name__ == '__main__': main()