Mysqlのデータを変更する(MySQL-python==1.2.5)

データベースのqueryを整形する。データベースに突っ込む前に"をつけるのが必要なのとないのがあって訳わからん。 ついでにutf-8に変換しないといけないのと、conn.commit()しないとinsertが反映されないということでばりつまづいた。

# -*- coding: utf-8 -*-
# vim:tabstop=4:shiftwidth=4:expandtab

import sys
import MySQLdb
from datetime import datetime, timedelta
import argparse


def alter_sql_query(conn, day):
    date = datetime.strptime(day, "%Y%m%d")
    next_day = date + timedelta(days=1)
    cur = conn.cursor()
    try:
        sql = '''
            SELECT `[id]`, `[date]`, `[query]`
            FROM `[table名]`
            WHERE `[date]` >= \"{day}\"
            AND `[date]` < \"{next_day}\"
        '''.format(day=date.strftime("%Y-%m-%d"), next_day=next_day.strftime("%Y-%m-%d"))
        cur.execute(sql)
        rows = []
        ori_rows = cur.fetchall()
        for row in ori_rows:
            # [それなりの処理、"をつけたりつけなかったり。datetimeは放置で大丈夫な感じ]
            rows.append(tuple(row))
        bind = ','.join([
            '({0})'.format(','.join(['%s'] * 3))
        ] * len(rows))
        sql = '''
            INSERT INTO `[table名]` (`[id]`, `[date]`, `query`)
            VALUE {bind}
            ON DUPLICATE KEY UPDATE `query` = VALUES(`query`)
        '''.format(bind=bind, )
        params = []
        for v in rows:
            params.extend(v)
        # import pdb; pdb.set_trace()
        cur.execute(sql, params)
    except Exception as e1:
        raise e1
    finally:
        cur.close()


def execute(day):
    conn = MySQLdb.connect(host="127.0.0.1", db="zoff", user="zoff", passwd="zoff", charset="utf8")

    check = raw_input('(yes/no): ')
    if check != "yes":
        sys.exit(0)
    alter_sql_query(conn, day)
    conn.commit()
    conn.close()


if __name__ == '__main__':
    print 'the operation start', datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    parser = argparse.ArgumentParser()
    parser.add_argument('-d', action='store', dest='day',
                        help='checkday')
    day = parser.parse_args().day
    execute(day)
    print 'the operation finish', datetime.now().strftime("%Y-%m-%d %H:%M:%S")

rishida.hatenablog.com

SQLとSolrの情報を削除する方法

簡単な処理をバッチ化した。MySQLにアクセスする方法がわかったことが大きな収穫。

# -*- coding: utf-8 -*-
# vim:tabstop=4:shiftwidth=4:expandtab

import sys
import MySQLdb
import json
import urllib2
import argparse
from datetime import datetime


def check_jan_sql(jancode, conn, cond):
    print 'the jancode check(SQL) start', datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    cur = conn.cursor()
    try:
        sql = '''
            SELECT janCd,name FROM [table名] WHERE janCd = %s
        '''
        cur.execute(sql, [jancode, ])
        rows = cur.fetchall()
        if len(rows) == cond:
            for row in rows:
                janCd, name = row
                print '%s\t%s' % (janCd, name)
            return True
    except Exception as e1:
        raise e1
    finally:
        cur.close()
        return False


def check_jan_solr(jancode, cond):
    print 'the jancode check(Solr) start', datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    url_check = 'http://localhost:8983/solr/[core名]/select?q=(janCd:{jancode})'.format(
        jancode=jancode
    ) + '&fl=janCd&wt=json&indent=true'
    json_string = urllib2.urlopen(url_check).read()
    parsed = json.loads(json_string)
    response = parsed.get(u'response')
    print json_string
    if response and response.get(u'numFound') == cond:
        return True
    return False


def delete_jan_sql(jancode, conn):
    cur = conn.cursor()
    try:
        sql = '''
            DELETE FROM [table名] WHERE janCd = %s
        '''
        cur.execute(sql, [jancode, ])
    except Exception as e1:
        raise e1
    finally:
        cur.close()


def delete_jan_solr(jancode):
    request = urllib2.Request(
        'http://localhost:8983/solr/[core名]/update?wt=json&indent=true',
        data="<delete><query>janCd:%s</query></delete>" % jancode,
        headers={
            "Content-Type": "text/xml; charset=utf-8",
        },
    )
    json_string = urllib2.urlopen(request).read()
    parsed = json.loads(json_string)
    print parsed


def commit_delete_jan_solr():
    request = urllib2.Request(
        'http://localhost:8983/solr/[core名]/update?wt=json&indent=true',
        data="<commit />",
        headers={
            "Content-Type": "text/xml; charset=utf-8",
        },
    )
    json_string = urllib2.urlopen(request).read()
    parsed = json.loads(json_string)
    print parsed


def execute(jancode, checkonly):
    conn = MySQLdb.connect(host="localhost", db=[db名], user=[user名], passwd=[パスワード], charset="utf8")
    if check_jan_sql(jancode, conn, 1) and check_jan_solr(jancode, 1):
        print 'precheck OK'
    else:
        print 'precheck NG! %s is not in db' % jancode
        sys.exit(1)

    if checkonly:
        conn.close()
        sys.exit(0)

    print 'JanCd: %s OK???' % jancode
    check = raw_input('(yes/no): ')
    if check != "yes":
        sys.exit(0)

    delete_jan_sql(jancode, conn)
    delete_jan_solr(jancode)
    commit_delete_jan_solr()

    if check_jan_sql(jancode, conn, 0) and check_jan_solr(jancode, 0):
        print 'Delete check OK'
    else:
        print 'Delete check NG! %s is in db' % jancode
        sys.exit(1)

    conn.close()


if __name__ == '__main__':
    print 'the jancode delete operation start', datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    parser = argparse.ArgumentParser()
    parser.add_argument('-j', action='store', dest='jan_code',
                        help='Delete the jan code')
    parser.add_argument('--check', action='store_true', dest='checkonly', default=False,
                        help='Check only')
    jancode = parser.parse_args().jan_code
    checkonly = parser.parse_args().checkonly
    if jancode:
        execute(jancode, checkonly)
    else:
        print 'No jan code!'
    print 'the jancode delete operation finish', datetime.now().strftime("%Y-%m-%d %H:%M:%S")

Apacheのアクセスログはこんな感じでコマンドから調べられる。

切り取ってしまえば、調べが簡単

$ cut -d " " -f 1-8 access_log | grep "google" | grep -ve "10.0.0.1" -vEe "10\.0\.0\.(4[8-9]|5[0-9]|6[0-3])" | wc -l

ossfan.net

orangain.hatenablog.com

ksmzn.hatenablog.com

+α帯域制限をかけながらscpなどは行ったほうがいい。

d.hatena.ne.jp

リストから重複した要素を削除したい

リストの中のリストの一部が重複した場合の処理

datas = [['111111', ], ['111111', ], ]
seen = set()
seen_add = seen.add
datas = [x for x in datas if x[0] not in seen and not seen_add(x[0])]
print datas

#  [['111111']]

www.lifewithpython.com

日本語のcsvを取り扱う際便利(python2)

csvunicode変換は手間取るけど、良さげな変換をクラスにしてまとめている人がいたので拝借した。

import csv
import sys
import tempfile
import shutil
import os

calendar_dict = {
    "January": "1",
    "February": "2",
    "March": "3",
    "April": "4",
    "May": "5",
    "June": "6",
    "July": "7",
    "August": "8",
    "September": "9",
    "October": "10",
    "November": "11",
    "December": "12",
}


class UnicodeCsvReader:
    """csv.reader wrapper which decodes each value with designated encoding"""
    def __init__(self, iterable, dialect='excel', encoding="utf-8", *args, **kwds):
        self.reader = csv.reader(iterable, dialect=dialect, *args, **kwds)
        self.encoding = encoding
        self.dialect = self.reader.dialect
        self.line_num = 0

    def __iter__(self):
        return self

    def decode(self, value):
        return value and value.decode(self.encoding) or value

    def next(self):
        # csv.reader.next returns a list of values of next row
        cols = [self.decode(x) for x in self.reader.next()]
        self.line_num = self.reader.line_num
        return cols


class UnicodeCsvDictReader(csv.DictReader):
    def __init__(self, f, fieldnames=None, restkey=None, restval=None,
                 dialect="excel", encoding="utf-8", *args, **kwds):
        csv.DictReader.__init__(
            self, f, fieldnames, restkey, restval, dialect, *args, **kwds)
        self.encoding = encoding
        self.reader = UnicodeCsvReader(f, encoding=encoding)


def reader_csv(input_file, e="utf-8"):
    with open(input_file, 'r') as f:
        reader = UnicodeCsvReader(f)
        for row in reader:
            yield row


# ここの変換処理にてすべてOK
def writer_tsv(row):
    try:
        date = row[0].split()
        if date[0] in calendar_dict.keys():
            row[0] = "-".join([date[2], calendar_dict.get(date[0], ''), date[1]])
        row[0] = row[0].split(",")[0]
        row[1] = row[1].replace(u' ', u' ').replace(u'+', u' ')
    except Exception as e1:
        print e1, row
    finally:
        print row
        return u'\t'.join(row) + u'\n'


def write_str_into_file(iterable, output_filename,):
    with tempfile.NamedTemporaryFile(delete=False, dir='/var/tmp',) as f:
        for row in iterable:
            f.write(writer_tsv(row).encode('utf-8'))
        shutil.move(f.name, output_filename)
    if os.path.exists(f.name):
        os.remove(f.name)


def main():
    input_file = sys.sys.argv[1]
    write_str_into_file(
        reader_csv(os.path.abspath(os.path.expanduser(input_file))),
        "/tmp/result.tsv")


if __name__ == '__main__':
    main()

msiz.hatenablog.jp

インデックスを理解する。

MySQLはインデックスを使うとうまくチューニングできるらしい。 なんかいいサイトないかと探していると発見。

qiita.com

リンク先に一つずつアクセスして行って、学んでいこうとしてる。