入手pandas分析统计

来源:互联网 发布:php api接口参数加密 编辑:程序博客网 时间:2024/05/20 18:43

简单的数据统计和分析

前面我们已经完成了基金数据插入到数据表:
http://blog.csdn.net/github_26672553/article/details/78646417

简单的数据统计和分析,我们可以用SQL直接搞定了。

1、查询出NAV前2的数据

# coding: utf-8from sqlalchemy import create_engine,descfrom common.config import dbUrlfrom sqlalchemy.orm import  sessionmakerfrom mappers.FundMapper import FundMapper #基金数据表ORM类if __name__ == "__main__" :    engine = create_engine(dbUrl, echo=True)    # 创建数据库引擎    session = sessionmaker(engine)()    # 数据库会话对象    result = session.query(FundMapper).order_by(desc(FundMapper.NAV)).limit(2).all()    print(result) # [<mappers.FundMapper.FundMapper object at 0x1053e3d30>, <mappers.FundMapper.FundMapper object at 0x1053e3da0>]

2、查询NAV最高的

# coding: utf-8from sqlalchemy import create_engine,desc,funcfrom common.config import dbUrlfrom sqlalchemy.orm import  sessionmakerfrom mappers.FundMapper import FundMapper #基金数据表ORM类if __name__ == "__main__" :    engine = create_engine(dbUrl, echo=True)    # 创建数据库引擎    session = sessionmaker(engine)()    # 数据库会话对象    result = session.query(func.max(FundMapper.NAV)).scalar()   # 获取具体的值    print(result)  # 9.9999 

生成csv文件

读取数据表中的数据,最后生成csv文件

# coding: utf-8from sqlalchemy import create_engine,desc,funcfrom common.config import dbUrlfrom sqlalchemy.orm import  sessionmakerfrom mappers.FundMapper import FundMapper #基金数据表ORM类import csvif __name__ == "__main__" :    engine = create_engine(dbUrl, echo=True)    # 创建数据库引擎    session = sessionmaker(engine)()    # 数据库会话对象    result = session.query(FundMapper).limit(10).all() # 值取10行记录    with open("./csv/fund.csv","w",encoding="utf-8",newline="") as f:        writer = csv.writer(f)        writer.writerow(["code","name","NAV","ACCNAV","fdate","DGR","DGV"]) # 先写表头        for r in result:            writer.writerow([r.code, r.name, r.NAV, r.ACCNAV, r.fdate, r.DGR, r.DGV])        f.close()

知名分析库:pandas

安装:

#当然还是先要cd到虚拟环境目录./python3 -m pip install pandas -i http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com

2、简单使用pandas

    pd = pandas.read_csv("./csv/fund.csv", dtype={"code":pandas.np.str_})    print(pd[0:5]) # 取出第0~5行(不包括第5行)    print(pd[["code","name"]]) # 取出多列    print(pd["code"]) # 取一列
    res = pd.sort_values(by="NAV",ascending=False) # 根据NAV值 倒序    print(res.head(5)) # 取出前5条    print(res.tail(5)) # 取出后5条    # 把取出的结果 保存的一个新的csv文件    res[3:7].to_csv("./csv/res.csv",encoding="utf-8")