一个对帐的小脚本

来源:互联网 发布:千图网淘宝首页图片 编辑:程序博客网 时间:2024/05/01 22:23

前一段时间一直在做各种对数据的事情,因为我们的业务涉及到银联钱包,并且我们的客户认为其给的某些数据有问题,所以我们写了这样一个小脚本来验证,经验证数据没有问题,不过给过来的数据需要进行一些差错处理,需要仔细一点。
废话不多说,直接上程序

#!/usr/bin/env python# encoding: utf-8import loggingimport pymysqlimport osclass compare(object):    def __init__(self):        self.file_path = "G:/codes/python/total"        self.db_conn = None        self.__cid_info = dict()        self.get_file_list = list()        self.pay_file_list = list()    def get_conn(self):        try:            self.db_conn = pymysql.connect(host = "127.0.0.1", port=3306,                                user="root", passwd="205339", db="dbvip",charset='utf8')        except Exception,e:            logging.error("mysql get connection error [%s]"%e)            return False        return True    def get_filelist(self):        if not self.file_path: return -1        list = os.listdir(self.file_path)        list.sort()        for file_name in list:            if file_name[9:12] == 'get':                self.get_file_list.append(file_name)            else:                self.pay_file_list.append(file_name)        self.get_file_list.sort()        self.pay_file_list.sort()        logging.info("get_file_list %s"%self.get_file_list)        logging.info("pay_file_list %s"%self.pay_file_list)        return 0    def load_get_file(self):        for filename in self.get_file_list:            fname = self.file_path + '/' + filename            if not os.path.exists(fname):                logging.error("Fname %s not exists."%(fname))                return -1            if not os.access(fname, os.R_OK):                logging.error("Fname %s not readable."%(fname));                return -1            logging.info("Open file %s..."%fname)            fhandle = open(fname)            flist   = fhandle.readlines()            fhandle.close()            count = 0            if filename == "20140324-get.txt":                for line in flist:                    count += 1                    if len(line) == 0:                        continue                    rec        = line.split(',')                    cid        = rec[0]                    addpoint   = rec[1]                    if not self.__cid_info.has_key(cid):                        self.__cid_info[cid] = dict()                        self.__cid_info[cid]['addpoint'] = 0                        self.__cid_info[cid]['decpoint'] = 0                    self.__cid_info[cid]['addpoint'] += float(addpoint)                    if count % 1000 == 0:                        logging.info("processing line %d"%count)                continue            elif filename[:6] < "201411":                for line in flist:                    count += 1                    if count == 1:                        continue                    if len(line) == 0:                        continue                    rec        = line.split(' ')                    cid        = rec[1]                    account    = rec[2]                    verder_id  = rec[3]                    date       = rec[4]                    addpoint   = rec[13]                    status     = rec[14]                    if account <> '' or status <> '成功':                        logging.info("account %s,status %s line %d"%(account,status,count))                        continue                    if not self.__cid_info.has_key(cid):                        self.__cid_info[cid] = dict()                        self.__cid_info[cid]['addpoint'] = 0                        self.__cid_info[cid]['decpoint'] = 0                    self.__cid_info[cid]['addpoint'] += float(addpoint)                    if count % 1000 == 0:                        logging.info("processing line %d"%count)            else:                for line in flist:                    count += 1                    if len(line) == 0:                        continue                    rec        = line.split(',')                    cid        = rec[0].strip('\"')                    account    = rec[1].strip('\"')                    verder_id  = rec[2].strip('\"')                    date       = rec[3].strip('\"')                    if date >= "2015/07/01":                        continue                    addpoint   = float(rec[10][1:8])*(10 ** int(rec[10][-3:]))                    status     = rec[11].strip('\"')[:4]                    #if cid == 'c00055248975': print("addpoint[%s],date[%s]"%(addpoint,date))                    if account <> '' or status <> '成功':                        logging.info("account %s,status %s line %d"%(account,status,count))                        continue                    if not self.__cid_info.has_key(cid):                        self.__cid_info[cid] = dict()                        self.__cid_info[cid]['addpoint'] = 0                        self.__cid_info[cid]['decpoint'] = 0                    self.__cid_info[cid]['addpoint'] += float(addpoint)                    if count % 1000 == 0:                        logging.info("processing line %d"%count)        return 0    def load_pay_file(self):        for filename in self.pay_file_list:            fname = self.file_path + '/' + filename            if not os.path.exists(fname):                logging.error("Fname %s not exists."%(fname))                return -1            if not os.access(fname, os.R_OK):                logging.error("Fname %s not readable."%(fname));                return -1            logging.info("Open file %s..."%fname)            fhandle = open(fname)            flist   = fhandle.readlines()            fhandle.close()            count = 0            if filename == "20140324-pay.txt":                for line in flist:                    count += 1                    if len(line) == 0:                        continue                    rec        = line.split(',')                    cid        = rec[0]                    decpoint   = rec[1]                    if not self.__cid_info.has_key(cid):                        logging.info("the cid has not been send point %s,line %d"%(cid,count))                        continue                    self.__cid_info[cid]['decpoint'] += float(decpoint)                    if count % 1000 == 0:                        logging.info("processing line %d"%count)            elif filename[:6] < "201411":                for line in flist:                    count += 1                    if count == 1:                        continue                    if len(line) == 0:                        continue                    rec        = line.split(' ')                    cid        = rec[1]                    account    = rec[2]                    verder_id  = rec[3]                    date       = rec[4]                    decpoint   = rec[13]                    status     = rec[14]                    if account == '' or status <> '成功':                        logging.info("account %s,status %s,line %d"%(account,status,count))                        continue                    if not self.__cid_info.has_key(cid):                        logging.info("the cid has not been send point %s,line %d"%(cid,count))                        continue                    self.__cid_info[cid]['decpoint'] += float(decpoint)                    if count % 1000 == 0:                        logging.info("processing line %d"%count)            else:                for line in flist:                    count += 1                    if len(line) == 0:                        continue                    rec        = line.split(',')                    cid        = rec[0].strip('\"')                    account    = rec[1].strip('\"')                    verder_id  = rec[2].strip('\"')                    date       = rec[3].strip('\"')                    if date >= "2015/07/01":                        continue                    decpoint   = float(rec[10][1:5])*(10 ** int(rec[10][-3:]))                    status     = rec[11].strip('\"')[:4]                    if account == '' or status <> '成功':                        logging.info("account %s,status %s line %d"%(account,status,count))                        continue                    if not self.__cid_info.has_key(cid):                        logging.info("the cid has not been send point %s,line %d"%(cid,count))                        continue                    self.__cid_info[cid]['decpoint'] += float(decpoint)                    if count % 1000 == 0:                        logging.info("processing line %d"%count)        return 0    def commit_data(self):        count = 0        cur = self.db_conn.cursor()        for cid in self.__cid_info.keys():            count += 1            opt_sql = "insert into bm_cid_info(cid,add_point,dec_point) values('%s',%f,%f)"%(cid, round(self.__cid_info[cid]['addpoint'],2), round(self.__cid_info[cid]['decpoint'],2))            try:                cur.execute(opt_sql)            except Exception,e:                logging.error("mysql insert bm_cid_info error [%s]"%e)                cur.close()                return False            if count % 10000 == 0:                self.db_conn.commit()                logging.info("handled user %d"%count)        logging.info("handled total user %d"%count)        self.db_conn.commit()        cur.close()        return True    def run(self):        if self.get_conn() < 0: return -1        if self.get_filelist() < 0: return -1        if self.load_get_file() < 0: return -1        if self.load_pay_file() < 0: return -1        if self.commit_data() < 0: return -1        return 0if __name__ == '__main__':    comp = compare()    logging.basicConfig( filename="G:\codes\logs\compare.log",                         format  = "%(asctime)s %(levelname)s[%(process)d]:%(message)s",                         level   = "INFO")    if comp.run() < 0:        logging.info("compare failed")    else:        logging.info("compare succeed")

一个简单的小程序,说一下流程,我们有两种文件需要获取,一种是get.txt,一种是pay.txt,这两种文件格式基本相同但是有些差别,所以写了两个函数来进行处理,中间因为有过文件格式的变动所以需要好多个分枝进行处理,处理完之后会用一个字典把得到的所有信息汇总起来,最后提交到数据库里去,只实现了这么一个小功能,不过现在有了个新的想法,因为上一次看过一个讲python闭包的小视频,等一会试一下看看能不能改写一下这个程序变得更简单一点,要是可以的话再来给大家分享。

0 0
原创粉丝点击