实习程序1
来源:互联网 发布:淘宝关键词重复有用吗 编辑:程序博客网 时间:2024/06/15 19:09
# coding: utf-8# In[29]:#import the necessary modulesimport pymysql as mysqldbimport pandas as pdimport pymysql.cursorsimport astimport influxdbimport datetimeimport osfrom influxdb import InfluxDBClientfrom influxdb import DataFrameClientfrom sqlalchemy import create_engine# -*- coding: utf-8 -*-#encoding=utf-8# In[30]:#client = DataFrameClient(host=system_variable['INFLUXDB_HOST'], port=int(system_variable['INFLUXDB_PORT']),username=system_variable['INFLUXDB_USER'], password=system_variable['INFLUXDB_PASSWORD'], database='original_data_db')#There are four types of input for table_type :'electric_formula','gas_formula','steam_formula','hotoil_formula'#There are four types of input for table_name :'m_electric_course_hour','m_hotoil_course_hour','m_steam_course_hour,'m_gas_course_hour'#输入变量table_type有四种情况,分别是'electric_formula','gas_formula','steam_formula','hotoil_formula',分别代表四种能源的情况#输入变量table_name有四种情况,分别是'm_electric_course_hour','m_hotoil_course_hour','m_steam_course_hour,'m_gas_course_hour',也分别代表四种能源的情况#print the system_variable to observe#打印出环境变量查看起是否存在system_variable = os.environ#write_db = system_variable['MYSQL_DST_DB']#read_db = system_variable['MYSQL_SRC_DB']print(system_variable)#print(system_variable)#print("system_variable['MYSQL_HOST'] : "+system_variable['MYSQL_HOST'])#print("system_variable['MYSQL_PORT'] : "+system_variable['MYSQL_PORT'])#print("system_variable['MYSQL_USER'] : "+system_variable['MYSQL_USER'])#print("system_variable['MYSQL_PASSWORD'] : "+system_variable['MYSQL_PASSWORD'])#print("system_variable['INFLUXDB_HOST'] : "+system_variable['INFLUXDB_HOST'])#print("system_variable['INFLUXDB_PORT'] : "+system_variable['INFLUXDB_PORT'])#print("system_variable['INFLUXDB_USER'] : "+system_variable['INFLUXDB_USER'])#print("system_variable['INFLUXDB_PASSWORD'] : "+system_variable['INFLUXDB_PASSWORD'])#define the connect function to connect the mysql through system_variable#定义通过环境变量连接Mysql的函数def through_environ_connect(): connection = pymysql.connect(host=system_variable['MYSQL_HOST'], port=int(system_variable['MYSQL_PORT']), user=system_variable['MYSQL_USER'], password=system_variable['MYSQL_PASSWORD'], db=system_variable['MYSQL_DST_DB'], charset='utf8', cursorclass=pymysql.cursors.DictCursor) cursor = connection.cursor() print("environment way") return connection,cursor#define the connect functionto connect the mysql through specific address#定义通过特定地址访问Mysql的函数def through_direct_connect(): connection = pymysql.connect(host='120.77.86.1', port=6091, user='root', password='poi-t#admin', db='poit_cloud2.0', charset='utf8', cursorclass=pymysql.cursors.DictCursor) cursor = connection.cursor() print("normal local way") return connection,cursor#It's a function for updating one middle table#这个函数用于更新某一个特定的中间表,根据输入确定要更新的中间表def one_update_process(table_type,table_name_hour,table_name_day): # Connect to the mysql database # 连接mysql数据库 try: connection,cursor = through_environ_connect() except: connection,cursor = through_direct_connect() #get the e_course table #获得e_course表格 try: sql = "select * from " + "`" + system_variable['MYSQL_SRC_DB'] + "`" + "." + "e_course" except: sql = "select * from e_course" cursor.execute(sql) e_course = cursor.fetchall() #close the mysql database #关闭mysql数据库 cursor.close() connection.close() #It's a function for extracting variables from an equation #定义抽取公式中变量的函数 def extract_variable(equation): names = [ node.id for node in ast.walk(ast.parse(str(equation))) if isinstance(node, ast.Name) ] return names #It's a function for creating the {course:course_dial} datastructure, it looks like this [{'a':[a1,a2]},{'b':[b1,b2]}] # the a,b is the course_id, the a1,a2 or b1,b2 are the course_dial which connect with relative course_id #函数用于构造{course:course_dial}类型的数据结构,改数据结构类似于 [{'a':[a1,a2]},{'b':[b1,b2]}],a,b是代表了班组代号, #a1,a2或者b1,b2是相关班组的表格信息,即a1中存取了班组a中,表格a1的读数信息。 #输入e_course_data表示 def create_course_dial(e_course_data,formual='e_formula'): formula = [] course_id = [] [formula.append(each[str(formual)]) for each in e_course_data] [course_id.append(each['id']) for each in e_course_data] middle_process = dict(zip(course_id, formula)) course_dial = [] for key, value in middle_process.items(): course_dial.append({key:extract_variable(value)}) return course_dial #table_type = 'electric_formula','gas_formula','steam_formula','hotoil_formula' #create different relationship between course and dial in different energy type #获得班组与相关所有表格的信息,course_dial,同时获取班组与需要更新表格能源类型相关的表格信息。type_course_dial, #比如,若本次更新电相关的信息,则type_course_dial只构造该班组和电有关的表格的数据结构 #course_dial = create_course_dial(e_course) type_course_dial = create_course_dial(e_course,formual=str(table_type)) #connect to mysql #连接Mysql数据库 try: connection,cursor = through_environ_connect() except: connection,cursor = through_direct_connect() #connect to Influxdb #连接 Influxdb 数据库 try: client = DataFrameClient(host=system_variable['INFLUXDB_HOST'], port=int(system_variable['INFLUXDB_PORT']) ,username=system_variable['INFLUXDB_USER'], password=system_variable['INFLUXDB_PASSWORD'], database='original_data_db') print("Influxdb environment way") except: client = DataFrameClient(host='120.77.86.1', port=6093, username='root', password='poi-t#admin', database='original_data_db') print("Influxdb normal local way") # get the different part of two time of s_output_product table,t_output_product is the s_output_product last time # 获得s_output_product table,t_output_product的差集部分,t_output_product表示上一时刻的s_output_product表格 try: sql = """ SELECT 's_product_output_course' AS 'set', s.* FROM """ + "`" + system_variable['MYSQL_SRC_DB'] + "`" + "." + """s_product_output_course s WHERE ROW(s.eid,s.tid,s.id,s.type,s.output,s.year,s.month,s.day,s.course_name,s.begin_time,s.end_time) NOT IN ( SELECT * FROM t_product_output_course ) UNION ALL SELECT 't_product_output_course' AS 'set', s1.* FROM t_product_output_course s1 WHERE ROW(s1.eid,s1.tid,s1.id,s1.type,s1.output,s1.year,s1.month,s1.day,s1.course_name,s1.begin_time,s1.end_time) NOT IN ( SELECT * FROM """ + "`" + system_variable['MYSQL_SRC_DB'] + "`" + "." + """s_product_output_course ) """ except: sql = """ SELECT 's_product_output_course' AS 'set', s.* FROM s_product_output_course s WHERE ROW(s.eid,s.tid,s.id,s.type,s.output,s.year,s.month,s.day,s.course_name,s.begin_time,s.end_time) NOT IN ( SELECT * FROM t_product_output_course ) UNION ALL SELECT 't_product_output_course' AS 'set', s1.* FROM t_product_output_course s1 WHERE ROW(s1.eid,s1.tid,s1.id,s1.type,s1.output,s1.year,s1.month,s1.day,s1.course_name,s1.begin_time,s1.end_time) NOT IN ( SELECT * FROM s_product_output_course ) """ #get the change part of the s_product_output_course #得到差集部分 cursor.execute(sql) different_s_product = cursor.fetchall() #close the local database #关闭mysql数据库 cursor.close() connection.close() #print("this is the change part of s_product_output_course:") #print(different_s_product) #It's a function for changing the timestamp to right type and add year,month,day,hour columns #这个函数用于将时间数据转换为正确的格式,并且增加年,月和日的列,用于与中间表的格式相对应,输入是DataFrame返回格式为DataFrame def change_to_right_type(DataFrame,course_year,course_month,course_day,judge_day_or_hour): DataFrame = DataFrame.reset_index() DataFrame = DataFrame.rename(columns={'index':'dt'}) course_year = course_year course_month = course_month course_day = course_day judge_day_or_hour = judge_day_or_hour def change_right_type(time): return str(time).rsplit('+')[0] DataFrame['dt'] = DataFrame['dt'].apply(change_right_type) def convert_str_to_datetime(time_str): #return datetime.datetime.strptime(time_str, "%Y-%m-%d %H:%M:%S") #放回时间加8个小时 try: time_str = datetime.datetime.strptime(time_str, "%Y-%m-%d %H:%M:%S") except: time_str = time_str.rsplit('.')[0] time_str = datetime.datetime.strptime(time_str, "%Y-%m-%d %H:%M:%S") time_str = time_str + datetime.timedelta(hours=8) return time_str DataFrame['dt'] = DataFrame['dt'].apply(convert_str_to_datetime) def get_the_year(time_str): return time_str.year def get_the_month(time_str): return time_str.month def get_the_day(time_str): return time_str.day def get_the_hour(time_str): return time_str.hour if judge_day_or_hour == 'middle_day': DataFrame['year'] = course_year DataFrame['month'] = course_month DataFrame['day'] = course_day DataFrame['hour'] = DataFrame['dt'].apply(get_the_hour) else: DataFrame['year'] = DataFrame['dt'].apply(get_the_year) DataFrame['month'] = DataFrame['dt'].apply(get_the_month) DataFrame['day'] = DataFrame['dt'].apply(get_the_day) DataFrame['hour'] = DataFrame['dt'].apply(get_the_hour) return DataFrame #middle process for the data,because the date get from the influxdb need some handling,and add some columns #so the dataframe can like the middle table #数据处理的中间过程,因为在Influxdb中group by获得的数据需要一定的处理。获取相关的表格信息,企业信息等。 #输入query_list是从inlufxdb中group by出来的数据,course_id,course_name如名所示,output_id表示的一种标识,其在s_output表中是 #主键id,唯一标识。在中间表中表示为course_output_id,即通过s_表的改变,在中间表中发生了改变的条目,加上对应关系。 def middle_add_column_and_change_type(query_list,course_id,course_name,course_output_id,course_year,course_month,course_day,judge_day_or_hour): course_year = course_year course_month = course_month course_day = course_day judge_day_or_hour = judge_day_or_hour for key1,value1 in query_list.items(): add_eid = key1[1][0][1] add_tag_name = key1[1][1][1] add_course_id = key1 #获得的是索引的表名,比如:'s_electric_meter_data' value1['eid'] = add_eid value1['tag_name'] = add_tag_name value1['course_id'] = course_id value1['course_output_id'] = course_output_id value1['course_name'] = course_name return change_to_right_type(value1,course_year,course_month,course_day,judge_day_or_hour) #table_type = 'electric_formula','gas_formula','steam_formula','hotoil_formula' #decide the key words or table name query from influxdb depend on table_type #根据输入来确定在influxdb中索引的表名和需要查找的tag_name if table_type == 'electric_formula': energy_type_name = 'E' meter_table = "s_electric_meter_data" if table_type == 'gas_formula': energy_type_name = 'TR_LJ' meter_table = "s_gas_meter_data" if table_type == 'steam_formula': energy_type_name = 'F_TTL' meter_table = "s_steam_meter_data" if table_type == 'hotoil_formula': energy_type_name = 'HEAT_FLUX_TTL' meter_table = "s_hotoil_meter_data" def change_time_in_influxdb_sql(time): time = str(time) time = time.split(' ')[0] + 'T'+time.split(' ')[1] + '+08:00' return time #construct the different part of s_product_output data #从s_product_的差集中来构造所需的数据 #输入meter_table代表influxdb中的表名,change_part_of_s_product表示s_product中的差集部分 #import_course_dial表示和相关能源相关的表格信息,energy_type_name表示在influxdb中需要查找的tag_name def from_different_part_s_product_and_construct_right_type(meter_table,change_part_of_s_product,import_course_dial,energy_type_name): course_hour_list3 = [] course_day_list3 = [] # for each change record in s_product_output do the next operation to get the datastructure # 根据s_product_out差集中的每条记录构造需要的数据结构 for each1_change_record in change_part_of_s_product: eid = each1_change_record['eid'] course_id = each1_change_record['tid'] begin_time = each1_change_record['begin_time'] end_time = each1_change_record['end_time'] course_output_id = each1_change_record['id'] course_name = str(each1_change_record['course_name']) course_year = each1_change_record['year'] course_month = each1_change_record['month'] course_day = each1_change_record['day'] #print("This is the course {}".format(course_id)) #print(course_name) #the element in list4 is the information about one specific tag,so list3 is the last whole data #the data structure like this [{'a':[a1,a2]},{'b':[b1,b2]}],a,b mean different course, #a1,a2,b1,b2 mean different tag imformation in different course #list4列表中的元素代表了某一个特定tag_name在influxdb下检索的信息,在list4中最后的DataFrame数据类型 #数据的结构还是类似 [{'a':[a1,a2]},{'b':[b1,b2]}]的数据结构,a,b代表了不同的班组,a1,a2,b1,b2不同班组不同tag_name在 #influxdb下的索引数据 course_hour_list4 = [] course_day_list4 = [] course_hour_list3.append({course_id : course_hour_list4}) course_day_list3.append({course_id : course_day_list4}) #according to specific course_dial for each course,if the course appear in change part of s_product_out #search each tag relate to the relevant course and get the min,max from influxdb #根据s_pro表格的差集,索引每个需要索引的班组。如果该班组出现在s_pro的差集中,就在influxdb中搜索和该班组,以及、 #某能源和该班组有关的tag_name信息,并获取最大最小值等相关数据结构 for each2_course in import_course_dial: if course_id in each2_course: for each3_tagname in each2_course[course_id]: sql_hour = """ SELECT MAX(value) as max_V,MIN(value) as min_V FROM """+ meter_table+ """ WHERE tag_name='%s' AND type='%s' AND time>'%s' AND time<'%s' AND eid='%s' AND value>0 GROUP BY eid, tag_name,time(1h) fill(none) TZ('Asia/Shanghai')""" % (each3_tagname, str(energy_type_name), change_time_in_influxdb_sql(begin_time), change_time_in_influxdb_sql(end_time), eid) sql_day = """ SELECT MAX(value) as max_V,MIN(value) as min_V FROM """+ meter_table+ """ WHERE tag_name='%s' AND type='%s' AND time>'%s' AND time<'%s' AND eid='%s' AND value>0 GROUP BY eid, tag_name fill(none) TZ('Asia/Shanghai')""" % (each3_tagname, str(energy_type_name), change_time_in_influxdb_sql(begin_time), change_time_in_influxdb_sql(end_time), eid) middle_variable_hour = client.query(sql_hour) middle_variable_day = client.query(sql_day) #change the date from influxdb to right date type #将从influxdb中取到的数据变成合适的数据结构 middle_hour = middle_add_column_and_change_type(middle_variable_hour,course_id,course_name,course_output_id,course_year,course_month,course_day,'middle_hour') middle_day = middle_add_column_and_change_type(middle_variable_day,course_id,course_name,course_output_id,course_year,course_month,course_day,'middle_day') #middle_hour['year'] = course_year #middle_day['year'] = course_year #middle_hour['month'] = course_month #middle_day['month'] = course_month #middle_hour['day'] = course_day #middle_day['day'] = course_day #print("this is middle_hour", middle_hour) course_hour_list4.append(middle_hour) course_day_list4.append(middle_day) #print("we add the tag {} information".format(str(each3_tagname))) return course_hour_list3, course_day_list3 #get the last_data structure, put the whole date to last_data and the data structure is dataframe #将一个一个tag_name格式的DataFrame数据格式的数据放到一起,构造最后的数据结构 def construct_last_data_hour(contruct_list): last_data = pd.DataFrame(None, columns=['dt','max_V','min_V','eid','tag_name','course_id','year','month','day','hour','course_name','course_output_id']) for each1 in contruct_list: for key,value in each1.items(): for each2 in value: last_data = last_data.append(each2) return last_data def construct_last_data_day(contruct_list): last_data = pd.DataFrame(None, columns=['dt','max_V','min_V','eid','tag_name','course_id','year','month','day','hour','course_name','course_output_id']) for each1 in contruct_list: for key,value in each1.items(): for each2 in value: last_data = last_data.append(each2) return last_data #last all operation for update #判断s_produ的差集是否为空,非空则进行下面的操作 if len(different_s_product) != 0: #get the last data structure #通过传入influxdb中查询的表格名,s_produ的差集,输入需要查询的班组和相关表格信息的数据结构,需要查询的tag_name right_type_data_hour, right_type_data_day = from_different_part_s_product_and_construct_right_type(meter_table,different_s_product, type_course_dial,energy_type_name) last_append_data_hour = construct_last_data_hour(right_type_data_hour) last_append_data_day = construct_last_data_day(right_type_data_day) #get the course_output_id ,year and month for the middle table delete or update #获取需要更新的中间表格的course_output_id,year和month #因为s_output一段时间查询一次,所以根据course_output_id,year和month能定位到需要改变的数据 last_append_course_output_id = last_append_data_hour['course_output_id'].unique() #last_append_course_year = last_append_data_hour['year'].unique() #last_append_course_month = last_append_data_hour['month'].unique() #reconnect the local mysql for delete some record of the middle table and update it #重新连接mysql数据库,来更新中间表 #delete the old record data from middle table through output_id ,year and month #依据output_id ,year and month来删除中间表中的相关记录 for each in last_append_course_output_id: try: connection,cursor = through_environ_connect() except: connection,cursor = through_direct_connect() sql = "DELETE FROM " + table_name_hour + " WHERE course_output_id='%s'" % (each) cursor.execute(sql) connection.commit() cursor.close() connection.close() for each in last_append_course_output_id: try: connection,cursor = through_environ_connect() except: connection,cursor = through_direct_connect() sql = "DELETE FROM " + table_name_day + " WHERE course_output_id='%s' " % (each) cursor.execute(sql) connection.commit() cursor.close() connection.close() #append the update data to the old middle table of #将需要更新的记录更新到中间表中 #engine = create_engine('mysql+mysqldb://scott:tiger@localhost/foo') #engine=create_engine("mysql+pymysql://root:poi-t#admin@120.77.86.1:7091/poit_cloud2.0?charset=utf8",echo=True) try: engine = create_engine("mysql+pymysql://"+system_variable['MYSQL_USER']+":"+system_variable['MYSQL_PASSWORD'] +"@"+system_variable['MYSQL_HOST']+":"+str(system_variable['MYSQL_PORT'])+ "/" + system_variable['MYSQL_DST_DB'] + "?charset=utf8",echo=True) print("engine environment way") except: engine = create_engine("mysql+pymysql://root:poi-t#admin@120.77.86.1:6091/poit_cloud2.0?charset=utf8",echo=True) print("engine normal local way") print("this is the type:{}".format(table_type)) print("this is what we append in the mysql hour course_hour table:") print(last_append_data_hour) print("this is the number of hour course_hour table:") print(len(last_append_data_hour)) print("this is what we append in the mysql day course_hour table:") print(last_append_data_day) print("this is the number of hour course_day table:") print(len(last_append_data_day)) last_append_data_hour.drop_duplicates().to_sql(name=str(table_name_hour), con=engine, if_exists = 'append', index=False, chunksize=1000) last_append_data_day = last_append_data_day.drop(['dt', 'hour'], axis=1) last_append_data_day.drop_duplicates().to_sql(name=str(table_name_day), con=engine, if_exists = 'append', index=False, chunksize=1000) #close the local database #关闭mysql数据库 #last operation about the delete part of the s_product_output_course #最后补充发现删除记录的操作,从中取出s_pro表格取出id,确认为中间表的out_put_id,从而进行删除操作 try: connection,cursor = through_environ_connect() except: connection,cursor = through_direct_connect() try: sql = "SELECT * FROM t_product_output_course t WHERE t.id NOT IN (SELECT id FROM " + "`" + system_variable['MYSQL_SRC_DB'] + "`" + "." + "s_product_output_course)" except: sql = "SELECT * FROM t_product_output_course t WHERE t.id NOT IN (SELECT id FROM s_product_output_course)" cursor.execute(sql) delete_s_product = cursor.fetchall() cursor.close() connection.close() delete_output_id = [] for each in delete_s_product: delete_output_id.append(each['id']) try: connection,cursor = through_environ_connect() except: connection,cursor = through_direct_connect() for each in delete_output_id: try: connection,cursor = through_environ_connect() except: connection,cursor = through_direct_connect() sql = "DELETE FROM " + table_name_hour + " WHERE course_output_id='%s'" % (each) cursor.execute(sql) connection.commit() cursor.close() connection.close() for each in delete_output_id: try: connection,cursor = through_environ_connect() except: connection,cursor = through_direct_connect() sql = "DELETE FROM " + table_name_day + " WHERE course_output_id='%s'" % (each) cursor.execute(sql) connection.commit() cursor.close() connection.close() #table_type = 'electric_formula','gas_formula','steam_formula','hotoil_formula'#table_name = 'm_electric_course_hour','m_hotoil_course_hour','m_steam_course_hour','m_gas_course_hour'#根据更新单个表的程序,构造最后的更新四个表的程序,并且更新四个表之后对当前s_produ表进行记录,并且删除上一时刻记录的s_表,#上一时刻的s_produ表的信息是用t_produc表来记录def all_update_process(): one_update_process('electric_formula', 'stream_electric_course_hour', 'stream_electric_course_day') one_update_process('gas_formula','stream_gas_course_hour', 'stream_gas_course_day') one_update_process('steam_formula','stream_steam_course_hour', 'stream_steam_course_day') one_update_process('hotoil_formula', 'stream_hotoil_course_hour', 'stream_hotoil_course_day') #delete the old t_product_output and update it try: connection,cursor = through_environ_connect() except: connection,cursor = through_direct_connect() sql = "truncate t_product_output_course" cursor.execute(sql) connection.commit() cursor.close() connection.close() try: connection,cursor = through_environ_connect() except: connection,cursor = through_direct_connect() try: sql = "INSERT INTO t_product_output_course SELECT * FROM " + "`" + system_variable['MYSQL_SRC_DB'] + "`" + "." + "s_product_output_course" except: sql = "INSERT INTO t_product_output_course SELECT * FROM s_product_output_course" cursor.execute(sql) connection.commit() #colse the local database and reconnect the local mysql for test cursor.close() connection.close() all_update_process()
阅读全文
0 0
- 实习程序1
- 程序菜鸟的实习日记1
- linux实习 数据管理程序(1)
- 实习程序收藏
- android实习程序
- android实习程序2
- android实习程序3
- 编程实习程序
- 实习程序2
- 实习程序3
- 实习程序4
- 实习程序5
- 实习程序6
- 实习 1
- 实习1
- 菜鸟程序实习日记2
- 程序设计实习MOOC / 继承和派生——编程作业 第五周程序填空题1
- 我的程序人生之实习生活
- ios 枚举值 位移表示的好处
- 课时18 Linux网络基础配置
- Angular vs React 最全面深入对比
- LeetCode
- python 文本写入及文本替换练习
- 实习程序1
- 关于jQuery load()方法加载页面后台发生异常而前台页面加载失败且没有任何展示信息的问题处理
- 密码发生器--蓝桥杯java组历年真题
- oracle学习
- 单例模式
- Greendao3.0的A级使用(入门级)
- 纯HTML5 APP与原生APP的差距在哪?
- AndroidStudio取色器
- GameObject.FindGameObjectsWithTag