整理数据代码
来源:互联网 发布:姚明身体数据 编辑:程序博客网 时间:2024/06/05 20:25
import reimport pandas as pdfrom sqlalchemy import create_engineENGINE_EASY = create_engine( "mysql+pymysql://{}:{}@{}:{}/{}".format('xxx', 'xxx', 'xxx', 4171, 'xxx', ), connect_args={"charset": "utf8"})#初试化数据库连接def fetch_fund_info(fund_id, engine=ENGINE_EASY): if isinstance(fund_id, str): fund_id = [fund_id] fund_id = ','.join(["'{}'".format(ids) for ids in fund_id]) #fund_id基金id号 #如果fund_id是字符串,把他变成列表 #遍历fund_id列表,格式化到“ ”中,如:'1','2','3','4','5' sql = "SELECT fia.fund_id,DATE_FORMAT(fia.statistic_date, '%%Y-%%m-%%d') AS nav_date," \ "fia.fund_name,fia.fund_full_name,fia.type_code_name_3 " \ "AS fund_type_issuance,concat_ws('-',fia.type_code_name_1,fia.stype_code_name_1) AS fund_type_strategy," \ "DATE_FORMAT(fia.foundation_date, '%%Y-%%m-%%d') AS foundation_date," \ "fia.type_code_name_4 AS fund_type_structure,fia.region," \ "fia.fund_status,fia.fund_time_limit,fia.open_date,fia.data_freq,fia.fund_stockbroker," \ "fia.fund_custodian,fia.fee_subscription,fia.expected_return,fia.fee_redeem,fia.fee_manage," \ "fia.fee_trust,DATE_FORMAT(fia.reg_time, '%%Y-%%m-%%d') AS reg_time," \ "fia.fee_pay,mi.user_name AS fund_manager,mi.resume AS manager_info," \ "oi.`profile` AS org_info, fia.org_full_name AS org_name,fas.asset_scale FROM fund_info_aggregation fia " \ "LEFT JOIN fund_manager_mapping fmm ON fmm.fund_id = fia.fund_id " \ "AND fmm.is_leader = 1 AND fmm.is_current = 1 " \ "LEFT JOIN manager_info mi ON fmm.user_id = mi.user_id LEFT JOIN org_info oi ON oi.org_id = fia.org_id " \ "LEFT JOIN fund_asset_scale fas ON fas.fund_id = fia.fund_id AND fas.statistic_date IN " \ "(SELECT min(statistic_date) FROM fund_asset_scale WHERE fund_id = fia.fund_id) " \ "WHERE fia.fund_id in ({})".format(fund_id) data = pd.read_sql(sql, engine) #pandas.read_sql接收两个参数,一个是SQL语句,一个是数据库连接引擎engine,返回一个DataFrame result = {} for fid in set(data['fund_id']):#把data中的fund_id进行去重,fid为去重后的fund_id info_data = data[data['fund_id'] == fid]#把去重后的data数据传给info_data info_data.drop('fund_id', axis=1, inplace=True)#把fund_id那一列去掉并替换 info_data.index = range(len(info_data))#根据info_data的长度对info_data的索引重新排序,如:df.index = range(4) manager_data = info_data[['fund_manager', 'manager_info']]#把fund_manager,manager_info这两列提取出来重新建立一个DataFrame,传给manager_data manager_data.drop_duplicates(subset=['fund_manager'], inplace=True)#把fund_manager这列重复的去掉并代替之前DataFrame org_data = info_data[['org_name', "org_info"]] org_data.drop_duplicates(inplace=True) info_data.drop(['fund_manager', 'manager_info', 'org_name', "org_info"], axis=1, inplace=True) # 把fund_manager,manager_info,org_name,org_info这列去掉并代替之前DataFrame info_data.drop_duplicates(inplace=True) manager_info = [] for i in range(len(manager_data)): manager_name = manager_data['fund_manager'][i] manager_resume = manager_data['manager_info'][i] if manager_name is not None and manager_resume is not None: pattern = manager_name + "\w{0,2}\s*[::,,]\s*" manager_resume = re.sub(pattern, "", manager_resume) manager_info.append({"manager_name": manager_name, "manager_resume": manager_resume}) info_data['manager_info'] = [manager_info] info_data['org_info'] = org_data.to_dict(orient='record') result[fid] = info_data.to_dict(orient='record')[0] #>>> df1.to_dict(orient='records') #[{'col3': 1, 'col4': 1}, {'col3': 2, 'col4': 2}, {'col3': 3, 'col4': 3}, {'col3': 4, 'col4': 4}] #编程以行为一个字典(包含基金经理名字:刘丹) return result
阅读全文
0 0
- 整理数据代码
- 关于数据格式化代码整理
- 代码整理
- 代码整理
- 数据整理
- 整理数据
- 整理数据
- 数据整理
- “R语言实战:机器学习与数据分析”代码发布链接整理
- 内存空间分几部分:代码段、数据段,栈,堆 (收集整理)
- 内存空间分几部分:代码段、数据段,栈,堆 (收集整理)
- 内存空间分几部分:代码段、数据段,栈,堆 (收集整理)
- 内存空间分几部分:代码段、数据段,栈,堆 (收集整理)
- 内存空间分几部分:代码段、数据段,栈,堆 (收集整理)
- 内存空间分几部分:代码段、数据段,栈,堆 (收集整理)
- 数据机构 循环双链表 代码整理 清晰明了 思维整密的算法设计 值得品尝
- 内存空间分几部分:代码段、数据段,栈,堆 (收集整理)
- 内存空间分几部分:代码段、数据段,栈,堆 (收集整理)
- SimpleShader
- Spring Boot : 属性配置&Controller注解补充(四)
- 错误、调试、测试
- 听罗辑思维关于商业模式的思考
- C8051F120单片机双串口配置(uart0,uart1)
- 整理数据代码
- 阿里云服务器CentOS 6.5系统搭建php开发环境(apache+php+mysql)
- PAT乙级1010. 一元多项式求导 (25)
- 利用Hexo + GitHub搭建免费的个人博客
- 浅拷贝和深拷贝
- 微信小程序点击切换图形验证码
- 权限控制
- Android中的MVP框架搭建
- 实现一个函数,可以左旋字符串中的k个字符