Mysqlのデータを変更する(MySQL-python==1.2.5)
データベースのqueryを整形する。データベースに突っ込む前に"をつけるのが必要なのとないのがあって訳わからん。 ついでにutf-8に変換しないといけないのと、conn.commit()しないとinsertが反映されないということでばりつまづいた。
# -*- coding: utf-8 -*- # vim:tabstop=4:shiftwidth=4:expandtab import sys import MySQLdb from datetime import datetime, timedelta import argparse def alter_sql_query(conn, day): date = datetime.strptime(day, "%Y%m%d") next_day = date + timedelta(days=1) cur = conn.cursor() try: sql = ''' SELECT `[id]`, `[date]`, `[query]` FROM `[table名]` WHERE `[date]` >= \"{day}\" AND `[date]` < \"{next_day}\" '''.format(day=date.strftime("%Y-%m-%d"), next_day=next_day.strftime("%Y-%m-%d")) cur.execute(sql) rows = [] ori_rows = cur.fetchall() for row in ori_rows: # [それなりの処理、"をつけたりつけなかったり。datetimeは放置で大丈夫な感じ] rows.append(tuple(row)) bind = ','.join([ '({0})'.format(','.join(['%s'] * 3)) ] * len(rows)) sql = ''' INSERT INTO `[table名]` (`[id]`, `[date]`, `query`) VALUE {bind} ON DUPLICATE KEY UPDATE `query` = VALUES(`query`) '''.format(bind=bind, ) params = [] for v in rows: params.extend(v) # import pdb; pdb.set_trace() cur.execute(sql, params) except Exception as e1: raise e1 finally: cur.close() def execute(day): conn = MySQLdb.connect(host="127.0.0.1", db="zoff", user="zoff", passwd="zoff", charset="utf8") check = raw_input('(yes/no): ') if check != "yes": sys.exit(0) alter_sql_query(conn, day) conn.commit() conn.close() if __name__ == '__main__': print 'the operation start', datetime.now().strftime("%Y-%m-%d %H:%M:%S") parser = argparse.ArgumentParser() parser.add_argument('-d', action='store', dest='day', help='checkday') day = parser.parse_args().day execute(day) print 'the operation finish', datetime.now().strftime("%Y-%m-%d %H:%M:%S")
SQLとSolrの情報を削除する方法
簡単な処理をバッチ化した。MySQLにアクセスする方法がわかったことが大きな収穫。
# -*- coding: utf-8 -*- # vim:tabstop=4:shiftwidth=4:expandtab import sys import MySQLdb import json import urllib2 import argparse from datetime import datetime def check_jan_sql(jancode, conn, cond): print 'the jancode check(SQL) start', datetime.now().strftime("%Y-%m-%d %H:%M:%S") cur = conn.cursor() try: sql = ''' SELECT janCd,name FROM [table名] WHERE janCd = %s ''' cur.execute(sql, [jancode, ]) rows = cur.fetchall() if len(rows) == cond: for row in rows: janCd, name = row print '%s\t%s' % (janCd, name) return True except Exception as e1: raise e1 finally: cur.close() return False def check_jan_solr(jancode, cond): print 'the jancode check(Solr) start', datetime.now().strftime("%Y-%m-%d %H:%M:%S") url_check = 'http://localhost:8983/solr/[core名]/select?q=(janCd:{jancode})'.format( jancode=jancode ) + '&fl=janCd&wt=json&indent=true' json_string = urllib2.urlopen(url_check).read() parsed = json.loads(json_string) response = parsed.get(u'response') print json_string if response and response.get(u'numFound') == cond: return True return False def delete_jan_sql(jancode, conn): cur = conn.cursor() try: sql = ''' DELETE FROM [table名] WHERE janCd = %s ''' cur.execute(sql, [jancode, ]) except Exception as e1: raise e1 finally: cur.close() def delete_jan_solr(jancode): request = urllib2.Request( 'http://localhost:8983/solr/[core名]/update?wt=json&indent=true', data="<delete><query>janCd:%s</query></delete>" % jancode, headers={ "Content-Type": "text/xml; charset=utf-8", }, ) json_string = urllib2.urlopen(request).read() parsed = json.loads(json_string) print parsed def commit_delete_jan_solr(): request = urllib2.Request( 'http://localhost:8983/solr/[core名]/update?wt=json&indent=true', data="<commit />", headers={ "Content-Type": "text/xml; charset=utf-8", }, ) json_string = urllib2.urlopen(request).read() parsed = json.loads(json_string) print parsed def execute(jancode, checkonly): conn = MySQLdb.connect(host="localhost", db=[db名], user=[user名], passwd=[パスワード], charset="utf8") if check_jan_sql(jancode, conn, 1) and check_jan_solr(jancode, 1): print 'precheck OK' else: print 'precheck NG! %s is not in db' % jancode sys.exit(1) if checkonly: conn.close() sys.exit(0) print 'JanCd: %s OK???' % jancode check = raw_input('(yes/no): ') if check != "yes": sys.exit(0) delete_jan_sql(jancode, conn) delete_jan_solr(jancode) commit_delete_jan_solr() if check_jan_sql(jancode, conn, 0) and check_jan_solr(jancode, 0): print 'Delete check OK' else: print 'Delete check NG! %s is in db' % jancode sys.exit(1) conn.close() if __name__ == '__main__': print 'the jancode delete operation start', datetime.now().strftime("%Y-%m-%d %H:%M:%S") parser = argparse.ArgumentParser() parser.add_argument('-j', action='store', dest='jan_code', help='Delete the jan code') parser.add_argument('--check', action='store_true', dest='checkonly', default=False, help='Check only') jancode = parser.parse_args().jan_code checkonly = parser.parse_args().checkonly if jancode: execute(jancode, checkonly) else: print 'No jan code!' print 'the jancode delete operation finish', datetime.now().strftime("%Y-%m-%d %H:%M:%S")
Apacheのアクセスログはこんな感じでコマンドから調べられる。
切り取ってしまえば、調べが簡単
$ cut -d " " -f 1-8 access_log | grep "google" | grep -ve "10.0.0.1" -vEe "10\.0\.0\.(4[8-9]|5[0-9]|6[0-3])" | wc -l
+α帯域制限をかけながらscpなどは行ったほうがいい。
リストから重複した要素を削除したい
リストの中のリストの一部が重複した場合の処理
datas = [['111111', ], ['111111', ], ] seen = set() seen_add = seen.add datas = [x for x in datas if x[0] not in seen and not seen_add(x[0])] print datas # [['111111']]
日本語のcsvを取り扱う際便利(python2)
csvのunicode変換は手間取るけど、良さげな変換をクラスにしてまとめている人がいたので拝借した。
import csv import sys import tempfile import shutil import os calendar_dict = { "January": "1", "February": "2", "March": "3", "April": "4", "May": "5", "June": "6", "July": "7", "August": "8", "September": "9", "October": "10", "November": "11", "December": "12", } class UnicodeCsvReader: """csv.reader wrapper which decodes each value with designated encoding""" def __init__(self, iterable, dialect='excel', encoding="utf-8", *args, **kwds): self.reader = csv.reader(iterable, dialect=dialect, *args, **kwds) self.encoding = encoding self.dialect = self.reader.dialect self.line_num = 0 def __iter__(self): return self def decode(self, value): return value and value.decode(self.encoding) or value def next(self): # csv.reader.next returns a list of values of next row cols = [self.decode(x) for x in self.reader.next()] self.line_num = self.reader.line_num return cols class UnicodeCsvDictReader(csv.DictReader): def __init__(self, f, fieldnames=None, restkey=None, restval=None, dialect="excel", encoding="utf-8", *args, **kwds): csv.DictReader.__init__( self, f, fieldnames, restkey, restval, dialect, *args, **kwds) self.encoding = encoding self.reader = UnicodeCsvReader(f, encoding=encoding) def reader_csv(input_file, e="utf-8"): with open(input_file, 'r') as f: reader = UnicodeCsvReader(f) for row in reader: yield row # ここの変換処理にてすべてOK def writer_tsv(row): try: date = row[0].split() if date[0] in calendar_dict.keys(): row[0] = "-".join([date[2], calendar_dict.get(date[0], ''), date[1]]) row[0] = row[0].split(",")[0] row[1] = row[1].replace(u' ', u' ').replace(u'+', u' ') except Exception as e1: print e1, row finally: print row return u'\t'.join(row) + u'\n' def write_str_into_file(iterable, output_filename,): with tempfile.NamedTemporaryFile(delete=False, dir='/var/tmp',) as f: for row in iterable: f.write(writer_tsv(row).encode('utf-8')) shutil.move(f.name, output_filename) if os.path.exists(f.name): os.remove(f.name) def main(): input_file = sys.sys.argv[1] write_str_into_file( reader_csv(os.path.abspath(os.path.expanduser(input_file))), "/tmp/result.tsv") if __name__ == '__main__': main()