实习程序1

来源:互联网 发布:淘宝关键词重复有用吗 编辑:程序博客网 时间:2024/06/15 19:09
# coding: utf-8# In[29]:#import the necessary modulesimport pymysql as mysqldbimport pandas as pdimport pymysql.cursorsimport astimport influxdbimport datetimeimport osfrom influxdb import InfluxDBClientfrom influxdb import DataFrameClientfrom sqlalchemy import create_engine# -*- coding: utf-8 -*-#encoding=utf-8# In[30]:#client = DataFrameClient(host=system_variable['INFLUXDB_HOST'], port=int(system_variable['INFLUXDB_PORT']),username=system_variable['INFLUXDB_USER'],                                 password=system_variable['INFLUXDB_PASSWORD'],                                 database='original_data_db')#There are four types of input for table_type :'electric_formula','gas_formula','steam_formula','hotoil_formula'#There are four types of input for table_name :'m_electric_course_hour','m_hotoil_course_hour','m_steam_course_hour,'m_gas_course_hour'#输入变量table_type有四种情况,分别是'electric_formula','gas_formula','steam_formula','hotoil_formula',分别代表四种能源的情况#输入变量table_name有四种情况,分别是'm_electric_course_hour','m_hotoil_course_hour','m_steam_course_hour,'m_gas_course_hour',也分别代表四种能源的情况#print the system_variable to observe#打印出环境变量查看起是否存在system_variable = os.environ#write_db = system_variable['MYSQL_DST_DB']#read_db = system_variable['MYSQL_SRC_DB']print(system_variable)#print(system_variable)#print("system_variable['MYSQL_HOST'] : "+system_variable['MYSQL_HOST'])#print("system_variable['MYSQL_PORT'] : "+system_variable['MYSQL_PORT'])#print("system_variable['MYSQL_USER'] : "+system_variable['MYSQL_USER'])#print("system_variable['MYSQL_PASSWORD'] : "+system_variable['MYSQL_PASSWORD'])#print("system_variable['INFLUXDB_HOST'] : "+system_variable['INFLUXDB_HOST'])#print("system_variable['INFLUXDB_PORT'] : "+system_variable['INFLUXDB_PORT'])#print("system_variable['INFLUXDB_USER'] : "+system_variable['INFLUXDB_USER'])#print("system_variable['INFLUXDB_PASSWORD'] : "+system_variable['INFLUXDB_PASSWORD'])#define the connect function to connect the mysql through system_variable#定义通过环境变量连接Mysql的函数def through_environ_connect():    connection = pymysql.connect(host=system_variable['MYSQL_HOST'],                                 port=int(system_variable['MYSQL_PORT']),                                 user=system_variable['MYSQL_USER'],                                 password=system_variable['MYSQL_PASSWORD'],                                 db=system_variable['MYSQL_DST_DB'],                                 charset='utf8',                                 cursorclass=pymysql.cursors.DictCursor)    cursor = connection.cursor()    print("environment way")    return connection,cursor#define the connect functionto connect the mysql through specific address#定义通过特定地址访问Mysql的函数def through_direct_connect():    connection = pymysql.connect(host='120.77.86.1',                                 port=6091,                                 user='root',                                 password='poi-t#admin',                                 db='poit_cloud2.0',                                 charset='utf8',                                 cursorclass=pymysql.cursors.DictCursor)    cursor = connection.cursor()    print("normal local way")    return connection,cursor#It's a function for updating one middle table#这个函数用于更新某一个特定的中间表,根据输入确定要更新的中间表def one_update_process(table_type,table_name_hour,table_name_day):    # Connect to the mysql database     # 连接mysql数据库    try:        connection,cursor = through_environ_connect()    except:        connection,cursor = through_direct_connect()    #get the e_course table     #获得e_course表格    try:        sql = "select * from " + "`" + system_variable['MYSQL_SRC_DB'] + "`" + "." + "e_course"    except:        sql = "select * from e_course"    cursor.execute(sql)    e_course = cursor.fetchall()    #close the mysql database    #关闭mysql数据库    cursor.close()    connection.close()        #It's a function for extracting variables from an equation     #定义抽取公式中变量的函数    def extract_variable(equation):        names = [            node.id for node in ast.walk(ast.parse(str(equation)))             if isinstance(node, ast.Name)                 ]        return names        #It's a function for creating the {course:course_dial} datastructure, it looks like this [{'a':[a1,a2]},{'b':[b1,b2]}]    # the a,b is the course_id, the a1,a2 or b1,b2 are the course_dial which connect with relative course_id    #函数用于构造{course:course_dial}类型的数据结构,改数据结构类似于 [{'a':[a1,a2]},{'b':[b1,b2]}],a,b是代表了班组代号,    #a1,a2或者b1,b2是相关班组的表格信息,即a1中存取了班组a中,表格a1的读数信息。    #输入e_course_data表示    def create_course_dial(e_course_data,formual='e_formula'):        formula = []        course_id = []        [formula.append(each[str(formual)]) for each in e_course_data]        [course_id.append(each['id']) for each in e_course_data]        middle_process = dict(zip(course_id, formula))        course_dial = []        for key, value in middle_process.items():            course_dial.append({key:extract_variable(value)})        return course_dial            #table_type = 'electric_formula','gas_formula','steam_formula','hotoil_formula'    #create different relationship between course and dial in different energy type    #获得班组与相关所有表格的信息,course_dial,同时获取班组与需要更新表格能源类型相关的表格信息。type_course_dial,    #比如,若本次更新电相关的信息,则type_course_dial只构造该班组和电有关的表格的数据结构    #course_dial = create_course_dial(e_course)    type_course_dial = create_course_dial(e_course,formual=str(table_type))        #connect to mysql     #连接Mysql数据库    try:        connection,cursor = through_environ_connect()    except:        connection,cursor = through_direct_connect()        #connect to Influxdb     #连接 Influxdb 数据库    try:        client = DataFrameClient(host=system_variable['INFLUXDB_HOST'], port=int(system_variable['INFLUXDB_PORT'])                                 ,username=system_variable['INFLUXDB_USER'],                                 password=system_variable['INFLUXDB_PASSWORD'],                                 database='original_data_db')        print("Influxdb environment way")    except:        client = DataFrameClient(host='120.77.86.1', port=6093, username='root', password='poi-t#admin', database='original_data_db')        print("Influxdb normal local way")        # get the different part of two time of s_output_product table,t_output_product is the s_output_product last time    # 获得s_output_product table,t_output_product的差集部分,t_output_product表示上一时刻的s_output_product表格    try:                sql = """                SELECT  's_product_output_course' AS 'set', s.*                FROM  """ + "`" + system_variable['MYSQL_SRC_DB'] + "`" + "."  + """s_product_output_course s                WHERE   ROW(s.eid,s.tid,s.id,s.type,s.output,s.year,s.month,s.day,s.course_name,s.begin_time,s.end_time) NOT IN                (                SELECT  *                FROM    t_product_output_course                )                UNION ALL                SELECT  't_product_output_course' AS 'set', s1.*                FROM     t_product_output_course s1                WHERE   ROW(s1.eid,s1.tid,s1.id,s1.type,s1.output,s1.year,s1.month,s1.day,s1.course_name,s1.begin_time,s1.end_time) NOT IN                (                SELECT  *                FROM   """ + "`" + system_variable['MYSQL_SRC_DB'] + "`" + "."  + """s_product_output_course                )            """    except:        sql = """                SELECT  's_product_output_course' AS 'set', s.*                FROM     s_product_output_course s                WHERE   ROW(s.eid,s.tid,s.id,s.type,s.output,s.year,s.month,s.day,s.course_name,s.begin_time,s.end_time) NOT IN                (                SELECT  *                FROM    t_product_output_course                )                UNION ALL                SELECT  't_product_output_course' AS 'set', s1.*                FROM     t_product_output_course s1                WHERE   ROW(s1.eid,s1.tid,s1.id,s1.type,s1.output,s1.year,s1.month,s1.day,s1.course_name,s1.begin_time,s1.end_time) NOT IN                (                SELECT  *                FROM    s_product_output_course                )            """    #get the change part of the s_product_output_course    #得到差集部分    cursor.execute(sql)    different_s_product = cursor.fetchall()    #close the local database    #关闭mysql数据库    cursor.close()    connection.close()        #print("this is the change part of s_product_output_course:")    #print(different_s_product)     #It's a function for changing the timestamp to right type and add year,month,day,hour columns    #这个函数用于将时间数据转换为正确的格式,并且增加年,月和日的列,用于与中间表的格式相对应,输入是DataFrame返回格式为DataFrame    def change_to_right_type(DataFrame,course_year,course_month,course_day,judge_day_or_hour):        DataFrame = DataFrame.reset_index()        DataFrame = DataFrame.rename(columns={'index':'dt'})        course_year = course_year        course_month = course_month        course_day = course_day        judge_day_or_hour = judge_day_or_hour        def change_right_type(time):            return str(time).rsplit('+')[0]                    DataFrame['dt'] = DataFrame['dt'].apply(change_right_type)                def convert_str_to_datetime(time_str):            #return datetime.datetime.strptime(time_str, "%Y-%m-%d %H:%M:%S")            #放回时间加8个小时            try:                time_str = datetime.datetime.strptime(time_str, "%Y-%m-%d %H:%M:%S")            except:                time_str = time_str.rsplit('.')[0]                time_str = datetime.datetime.strptime(time_str, "%Y-%m-%d %H:%M:%S")            time_str =  time_str + datetime.timedelta(hours=8)            return time_str         DataFrame['dt'] = DataFrame['dt'].apply(convert_str_to_datetime)        def get_the_year(time_str):            return time_str.year        def get_the_month(time_str):            return time_str.month        def get_the_day(time_str):            return time_str.day        def get_the_hour(time_str):            return time_str.hour                            if judge_day_or_hour == 'middle_day':            DataFrame['year'] = course_year            DataFrame['month'] = course_month            DataFrame['day'] = course_day            DataFrame['hour'] = DataFrame['dt'].apply(get_the_hour)        else:            DataFrame['year'] = DataFrame['dt'].apply(get_the_year)            DataFrame['month'] = DataFrame['dt'].apply(get_the_month)            DataFrame['day'] = DataFrame['dt'].apply(get_the_day)            DataFrame['hour'] = DataFrame['dt'].apply(get_the_hour)                    return DataFrame        #middle process for the data,because the date get from the influxdb need some handling,and add some columns    #so the dataframe can like the middle table    #数据处理的中间过程,因为在Influxdb中group by获得的数据需要一定的处理。获取相关的表格信息,企业信息等。    #输入query_list是从inlufxdb中group by出来的数据,course_id,course_name如名所示,output_id表示的一种标识,其在s_output表中是    #主键id,唯一标识。在中间表中表示为course_output_id,即通过s_表的改变,在中间表中发生了改变的条目,加上对应关系。    def middle_add_column_and_change_type(query_list,course_id,course_name,course_output_id,course_year,course_month,course_day,judge_day_or_hour):                course_year = course_year        course_month = course_month        course_day = course_day        judge_day_or_hour = judge_day_or_hour        for key1,value1 in query_list.items():            add_eid = key1[1][0][1]            add_tag_name = key1[1][1][1]            add_course_id = key1 #获得的是索引的表名,比如:'s_electric_meter_data'                     value1['eid'] = add_eid            value1['tag_name'] = add_tag_name            value1['course_id'] = course_id            value1['course_output_id'] = course_output_id            value1['course_name'] = course_name            return change_to_right_type(value1,course_year,course_month,course_day,judge_day_or_hour)      #table_type = 'electric_formula','gas_formula','steam_formula','hotoil_formula'    #decide the key words or table name query from influxdb depend on table_type       #根据输入来确定在influxdb中索引的表名和需要查找的tag_name    if table_type == 'electric_formula':        energy_type_name = 'E'        meter_table = "s_electric_meter_data"    if table_type == 'gas_formula':          energy_type_name = 'TR_LJ'        meter_table = "s_gas_meter_data"    if table_type == 'steam_formula':        energy_type_name = 'F_TTL'        meter_table = "s_steam_meter_data"    if table_type == 'hotoil_formula':        energy_type_name = 'HEAT_FLUX_TTL'        meter_table = "s_hotoil_meter_data"                    def change_time_in_influxdb_sql(time):        time = str(time)        time = time.split(' ')[0] + 'T'+time.split(' ')[1] + '+08:00'        return time        #construct the different part of s_product_output data    #从s_product_的差集中来构造所需的数据    #输入meter_table代表influxdb中的表名,change_part_of_s_product表示s_product中的差集部分    #import_course_dial表示和相关能源相关的表格信息,energy_type_name表示在influxdb中需要查找的tag_name    def from_different_part_s_product_and_construct_right_type(meter_table,change_part_of_s_product,import_course_dial,energy_type_name):        course_hour_list3 = []        course_day_list3 = []        # for each change record in s_product_output do the next operation to get the datastructure        # 根据s_product_out差集中的每条记录构造需要的数据结构        for each1_change_record in change_part_of_s_product:            eid = each1_change_record['eid']            course_id = each1_change_record['tid']            begin_time = each1_change_record['begin_time']            end_time = each1_change_record['end_time']            course_output_id = each1_change_record['id']            course_name = str(each1_change_record['course_name'])            course_year = each1_change_record['year']            course_month = each1_change_record['month']            course_day = each1_change_record['day']            #print("This is the course {}".format(course_id))            #print(course_name)                        #the element in list4 is the information about one specific tag,so list3 is the last whole data            #the data structure like this [{'a':[a1,a2]},{'b':[b1,b2]}],a,b mean different course,            #a1,a2,b1,b2 mean different tag imformation in different course            #list4列表中的元素代表了某一个特定tag_name在influxdb下检索的信息,在list4中最后的DataFrame数据类型            #数据的结构还是类似 [{'a':[a1,a2]},{'b':[b1,b2]}]的数据结构,a,b代表了不同的班组,a1,a2,b1,b2不同班组不同tag_name在            #influxdb下的索引数据            course_hour_list4 = []            course_day_list4 = []            course_hour_list3.append({course_id : course_hour_list4})            course_day_list3.append({course_id : course_day_list4})            #according to specific course_dial for each course,if the course appear in change part of s_product_out            #search each tag relate to the relevant course and get the min,max from influxdb            #根据s_pro表格的差集,索引每个需要索引的班组。如果该班组出现在s_pro的差集中,就在influxdb中搜索和该班组,以及、            #某能源和该班组有关的tag_name信息,并获取最大最小值等相关数据结构            for each2_course in import_course_dial:                if course_id in each2_course:                    for each3_tagname in each2_course[course_id]:                        sql_hour = """                                 SELECT  MAX(value) as max_V,MIN(value) as min_V FROM """+ meter_table+ """                                  WHERE tag_name='%s' AND type='%s' AND time>'%s' AND time<'%s' AND eid='%s' AND value>0                                 GROUP BY eid, tag_name,time(1h) fill(none) TZ('Asia/Shanghai')""" % (each3_tagname, str(energy_type_name), change_time_in_influxdb_sql(begin_time), change_time_in_influxdb_sql(end_time), eid)                                                sql_day = """                                 SELECT  MAX(value) as max_V,MIN(value) as min_V FROM """+ meter_table+ """                                  WHERE tag_name='%s' AND type='%s' AND time>'%s' AND time<'%s' AND eid='%s' AND value>0                                 GROUP BY eid, tag_name fill(none) TZ('Asia/Shanghai')""" % (each3_tagname, str(energy_type_name), change_time_in_influxdb_sql(begin_time), change_time_in_influxdb_sql(end_time), eid)                                                middle_variable_hour = client.query(sql_hour)                        middle_variable_day = client.query(sql_day)                        #change the date from influxdb to right date type                         #将从influxdb中取到的数据变成合适的数据结构                        middle_hour = middle_add_column_and_change_type(middle_variable_hour,course_id,course_name,course_output_id,course_year,course_month,course_day,'middle_hour')                        middle_day = middle_add_column_and_change_type(middle_variable_day,course_id,course_name,course_output_id,course_year,course_month,course_day,'middle_day')                        #middle_hour['year'] = course_year                        #middle_day['year'] = course_year                        #middle_hour['month'] = course_month                        #middle_day['month'] = course_month                        #middle_hour['day'] = course_day                        #middle_day['day'] = course_day                        #print("this is middle_hour", middle_hour)                                                course_hour_list4.append(middle_hour)                        course_day_list4.append(middle_day)                        #print("we add the tag {} information".format(str(each3_tagname)))        return course_hour_list3, course_day_list3        #get the last_data structure, put the whole date to last_data and the data structure is dataframe    #将一个一个tag_name格式的DataFrame数据格式的数据放到一起,构造最后的数据结构    def construct_last_data_hour(contruct_list):        last_data = pd.DataFrame(None, columns=['dt','max_V','min_V','eid','tag_name','course_id','year','month','day','hour','course_name','course_output_id'])        for each1 in contruct_list:            for key,value in each1.items():                for each2 in value:                    last_data =  last_data.append(each2)        return last_data        def construct_last_data_day(contruct_list):        last_data = pd.DataFrame(None, columns=['dt','max_V','min_V','eid','tag_name','course_id','year','month','day','hour','course_name','course_output_id'])        for each1 in contruct_list:            for key,value in each1.items():                for each2 in value:                    last_data =  last_data.append(each2)        return last_data                #last all operation for update     #判断s_produ的差集是否为空,非空则进行下面的操作    if len(different_s_product) != 0:        #get the last data structure        #通过传入influxdb中查询的表格名,s_produ的差集,输入需要查询的班组和相关表格信息的数据结构,需要查询的tag_name        right_type_data_hour, right_type_data_day = from_different_part_s_product_and_construct_right_type(meter_table,different_s_product, type_course_dial,energy_type_name)        last_append_data_hour = construct_last_data_hour(right_type_data_hour)        last_append_data_day = construct_last_data_day(right_type_data_day)        #get the course_output_id ,year and month for the middle table delete or update        #获取需要更新的中间表格的course_output_id,year和month        #因为s_output一段时间查询一次,所以根据course_output_id,year和month能定位到需要改变的数据        last_append_course_output_id = last_append_data_hour['course_output_id'].unique()        #last_append_course_year = last_append_data_hour['year'].unique()        #last_append_course_month = last_append_data_hour['month'].unique()        #reconnect the local mysql for delete some record of the middle table and update it        #重新连接mysql数据库,来更新中间表                #delete the old record data from middle table through output_id ,year and month        #依据output_id ,year and month来删除中间表中的相关记录        for each in last_append_course_output_id:            try:                connection,cursor = through_environ_connect()            except:                connection,cursor = through_direct_connect()            sql = "DELETE FROM " + table_name_hour + " WHERE course_output_id='%s'" % (each)            cursor.execute(sql)            connection.commit()            cursor.close()            connection.close()                for each in last_append_course_output_id:            try:                connection,cursor = through_environ_connect()            except:                connection,cursor = through_direct_connect()            sql = "DELETE FROM " + table_name_day + " WHERE course_output_id='%s' " % (each)            cursor.execute(sql)            connection.commit()            cursor.close()            connection.close()        #append the update data to the old middle table of         #将需要更新的记录更新到中间表中        #engine = create_engine('mysql+mysqldb://scott:tiger@localhost/foo')        #engine=create_engine("mysql+pymysql://root:poi-t#admin@120.77.86.1:7091/poit_cloud2.0?charset=utf8",echo=True)        try:            engine = create_engine("mysql+pymysql://"+system_variable['MYSQL_USER']+":"+system_variable['MYSQL_PASSWORD']                             +"@"+system_variable['MYSQL_HOST']+":"+str(system_variable['MYSQL_PORT'])+                             "/" + system_variable['MYSQL_DST_DB'] + "?charset=utf8",echo=True)            print("engine environment way")        except:            engine = create_engine("mysql+pymysql://root:poi-t#admin@120.77.86.1:6091/poit_cloud2.0?charset=utf8",echo=True)            print("engine normal local way")                print("this is the type:{}".format(table_type))        print("this is what we append in the mysql hour course_hour table:")        print(last_append_data_hour)        print("this is the number of hour course_hour table:")        print(len(last_append_data_hour))        print("this is what we append in the mysql day course_hour table:")        print(last_append_data_day)        print("this is the number of hour course_day table:")        print(len(last_append_data_day))                                last_append_data_hour.drop_duplicates().to_sql(name=str(table_name_hour), con=engine, if_exists = 'append', index=False, chunksize=1000)        last_append_data_day = last_append_data_day.drop(['dt', 'hour'], axis=1)        last_append_data_day.drop_duplicates().to_sql(name=str(table_name_day), con=engine, if_exists = 'append', index=False, chunksize=1000)                #close the local database        #关闭mysql数据库                        #last operation about the delete part of the s_product_output_course        #最后补充发现删除记录的操作,从中取出s_pro表格取出id,确认为中间表的out_put_id,从而进行删除操作                try:            connection,cursor = through_environ_connect()        except:            connection,cursor = through_direct_connect()        try:            sql = "SELECT * FROM t_product_output_course t WHERE t.id NOT IN (SELECT id FROM " + "`" + system_variable['MYSQL_SRC_DB'] + "`" + "." + "s_product_output_course)"        except:            sql = "SELECT * FROM t_product_output_course t WHERE t.id NOT IN (SELECT id FROM s_product_output_course)"        cursor.execute(sql)        delete_s_product = cursor.fetchall()        cursor.close()        connection.close()        delete_output_id = []        for each in delete_s_product:            delete_output_id.append(each['id'])        try:            connection,cursor = through_environ_connect()        except:            connection,cursor = through_direct_connect()              for each in delete_output_id:            try:                connection,cursor = through_environ_connect()            except:                connection,cursor = through_direct_connect()            sql = "DELETE FROM " + table_name_hour + " WHERE course_output_id='%s'" % (each)            cursor.execute(sql)            connection.commit()            cursor.close()            connection.close()                            for each in delete_output_id:            try:                connection,cursor = through_environ_connect()            except:                connection,cursor = through_direct_connect()            sql = "DELETE FROM " + table_name_day + " WHERE course_output_id='%s'" % (each)            cursor.execute(sql)            connection.commit()            cursor.close()            connection.close()        #table_type = 'electric_formula','gas_formula','steam_formula','hotoil_formula'#table_name = 'm_electric_course_hour','m_hotoil_course_hour','m_steam_course_hour','m_gas_course_hour'#根据更新单个表的程序,构造最后的更新四个表的程序,并且更新四个表之后对当前s_produ表进行记录,并且删除上一时刻记录的s_表,#上一时刻的s_produ表的信息是用t_produc表来记录def all_update_process():    one_update_process('electric_formula', 'stream_electric_course_hour', 'stream_electric_course_day')    one_update_process('gas_formula','stream_gas_course_hour', 'stream_gas_course_day')    one_update_process('steam_formula','stream_steam_course_hour', 'stream_steam_course_day')    one_update_process('hotoil_formula', 'stream_hotoil_course_hour', 'stream_hotoil_course_day')    #delete the old t_product_output and update it    try:        connection,cursor = through_environ_connect()      except:        connection,cursor = through_direct_connect()    sql = "truncate t_product_output_course"    cursor.execute(sql)    connection.commit()    cursor.close()    connection.close()        try:        connection,cursor = through_environ_connect()      except:        connection,cursor = through_direct_connect()        try:        sql = "INSERT INTO t_product_output_course SELECT * FROM " + "`" + system_variable['MYSQL_SRC_DB'] + "`" + "." + "s_product_output_course"    except:        sql = "INSERT INTO t_product_output_course SELECT * FROM s_product_output_course"    cursor.execute(sql)    connection.commit()    #colse the local database and reconnect the local mysql for test    cursor.close()    connection.close()    all_update_process()

原创粉丝点击