python数据分析学习笔记八
来源:互联网 发布:php require use 编辑:程序博客网 时间:2024/04/25 21:56
第八章 应用数据库
1 基于sqlite3的轻量级访问
轻盈的关系型数据库
示例代码如下:
import sqlite3# 创建数据库联接with sqlite3.connect(":memory:") as con: # 取得游标 c = con.cursor() # 创建数据库表 c.execute(''' CREATE TABLE sensors( data text, city text, code text, sensor_id real, temperature real) ''')# 查询数据库表for table in c.execute("SELECT name FROM sqlite_master WHERE type='table'"): print('Table', table)# 添加记录c.execute("INSERT INTO sensors VALUES('2016-02-27','uTRECHT','Red',42,15.14)")# 查询所有记录c.execute("SELECT * FROM sensors")print(c.fetchone())# 删除表con.execute("DROP TABLE sensors")# 查询数据库表print("#of table", c.execute("SELECT COUNT (*) FROM sqlite_master WHERE type='table'").fetchone()[0])# 关闭联接con.close()
运行结果如下:
Table ('sensors',)
('2016-02-27', 'uTRECHT', 'Red', 42.0,15.14)
#of table 0
2 通过pandas访问数据库
示例代码如下:
import statsmodels.api as smfrom pandas.io.sql import read_sqlimport sqlite3# 创建数据库联接with sqlite3.connect(":memory:") as con: # 取得游标 c = con.cursor() # 加载数据 data_loader = sm.datasets.sunspots.load_pandas() df = data_loader.data # 创建元组列表 rows = [tuple(x) for x in df.values] # 创建一个未规定数据类型的数据表 con.execute("CREATE TABLE sunspots (year,sunactivity)") # 添加多条记录 con.executemany("INSERT INTO sunspots(year,sunactivity) VALUES (?,?)", rows) # 查询总记录数 c.execute("SELECT COUNT(*) FROM sunspots") print(c.fetchone()) # 删除表中记录,并显示受影响行数 print("Deleted", con.execute("DELETE FROM sunspots where sunactivity >20").rowcount, "row") # 使用read_sql执行查询并返回dataFrame结果 print(read_sql("SELECT * FROM sunspots where year <1732", con)) # 删除数据表 con.execute("DROP TABLE sunspots") c.close()
运行结果如下:
(309,)
Deleted 217 row
year sunactivity
0 1700.0 5.0
1 1701.0 11.0
2 1702.0 16.0
3 1707.0 20.0
4 1708.0 10.0
5 1709.0 8.0
6 1710.0 3.0
7 1711.0 0.0
8 1712.0 0.0
9 1713.0 2.0
10 1714.0 11.0
11 1723.0 11.0
3 SQLAlchemy
可以把python中的类映射为数据库中的表,类以于java中的hibernate
示例代码如下:
Alchemy_entities.py
from sqlalchemy import Column, ForeignKey, Integer, Float, Stringfrom sqlalchemy.ext.declarative import declarative_basefrom sqlalchemy.orm import relationshipfrom sqlalchemy import create_enginefrom sqlalchemy import UniqueConstraint# 超类Base = declarative_base()# 观测站表class Station(Base): __tablename__ = 'station' # 表名 id = Column(Integer, primary_key=True) # id name = Column(String(14), nullable=False, unique=True) # 观测站名称 def __repr__(self): return "Id=%d name=%s" % (self.id, self.name)# 传感器表class Sensor(Base): __tablename__ = 'sensor' # 表名 id = Column(Integer, primary_key=True) # id last = Column(Integer) multiplier = Column(Float) station_id = Column(Integer, ForeignKey('station.id')) # 外键 station = relationship(Station) def __repr__(self): return "Id=%d last=%d multiplier=%.1f station_id=%d" % (self.id, self.last, self.multiplier, self.station_id)if __name__ == "__main__": print("This script is used by another script. Run python alchemy_query.py")
Populate_db.py
from sqlalchemy import create_enginefrom sqlalchemy.orm import sessionmakerfrom .alchemy_entities import Base, Sensor, Stationdef populate(engine): # 创建DBSession对象 Base.metadata.bind = engine DBSession = sessionmaker(bind=engine) session = DBSession() # 创建两个观测站 de_bilt = Station(name='De Bilt') session.add(de_bilt) session.add(Station(name='Utrecht')) session.commit() print('Station', de_bilt) # 添加传感器记录 temp_sesor = Sensor(last=20, multiplier=.1, station=de_bilt) session.add(temp_sesor) session.commit() print("Sensor", temp_sesor)if __name__ == "__main__": print("This script is used by another script. Run python alchemy_query.py")
Alchemy_query.py
from Eight.alchemy_entities import Base, Station, Sensorfrom Eight.populate_db import populatefrom sqlalchemy import create_enginefrom sqlalchemy.orm import sessionmakerimport osfrom pandas.io.sql import read_sql# 创建引擎engine = create_engine('sqlite:///demo.db')# 删除数据表Base.metadata.drop_all(engine)# 创建数据表Base.metadata.create_all(engine)populate(engine)Base.metadata.bind = engineDBSession = sessionmaker()DBSession.bind = enginesession = DBSession()# 查询station表中的第一行记录station = session.query(Station).first()# 查询所有stationprint('all station', session.query(Station).all())# 查询所有sensorprint('all sensor', session.query(Sensor).all())# 查询第一个station的第一个sensorprint('query sensor by station', session.query(Sensor).filter(Sensor.station == station).one())# 使用pandas的read_sql查询print('read_sql all station', read_sql("SELECT * FROM station", engine.raw_connection()))# 删除数据库,没有执行try: os.remove('demo.db') print('Delete demo.db')except OSError as e: # [WinError 32] 另一个程序正在使用此文件,进程无法访问。: 'demo.db' print(e) pass
运行结果如下:
Station Id=1 name=De Bilt
Sensor Id=1 last=20 multiplier=0.1station_id=1
all station [Id=1 name=De Bilt, Id=2name=Utrecht]
all sensor [Id=1 last=20 multiplier=0.1station_id=1]
query sensor by station Id=1 last=20multiplier=0.1 station_id=1
read_sql all station id name
0 1 De Bilt
1 2 Utrecht
[WinError 32] 另一个程序正在使用此文件,进程无法访问。: 'demo.db'
4 Pony ORM
Python写的orm包
from pony.orm import Database, db_sessionfrom pandas.io.sql import to_sqlimport statsmodels.api as sm# 创建sqlite数据库db = Database('sqlite', ':memory:')# 加载数据并写入数据库with db_session: data_loader = sm.datasets.sunspots.load_pandas() df = data_loader.data to_sql(df, "sunspots", db.get_connection()) print(db.select("count(*) FROM sunspots"))
运行结果如下:
[309]
5 Dataset 懒人数据库
是sqlalchemy的一个包装器
import datasetfrom pandas.io.sql import read_sqlfrom pandas.io.sql import to_sqlimport statsmodels.api as sm# 创建数据库连接db = dataset.connect('sqlite:///:memory:')# 创建books表table = db["books"]# 添加数据,在调用insert时会自动添加表模式table.insert(dict(title="Numpy Beginner's guide", author='Ivan Idris'))table.insert(dict(title="Numpy Cookbook", author='Ivan Idris'))table.insert(dict(title="Learning Numpy", author='Ivan Idris'))# 使用pandas的read_sql查询数据print(read_sql('SELECT * FROM books', db.executable.raw_connection()))# 加载数据data_loader = sm.datasets.sunspots.load_pandas()df = data_loader.datato_sql(df, "sunspots", db.executable.raw_connection())table = db['sunspots']# 查询前5条数据for row in table.find(_limit=5): print(row)print("Table", db.tables)
运行结果如下:
id author title
0 1 Ivan Idris Numpy Beginner's guide
1 2 Ivan Idris Numpy Cookbook
2 3 Ivan Idris Learning Numpy
OrderedDict([('index', 0), ('YEAR',1700.0), ('SUNACTIVITY', 5.0)])
OrderedDict([('index', 1), ('YEAR',1701.0), ('SUNACTIVITY', 11.0)])
OrderedDict([('index', 2), ('YEAR',1702.0), ('SUNACTIVITY', 16.0)])
OrderedDict([('index', 3), ('YEAR',1703.0), ('SUNACTIVITY', 23.0)])
OrderedDict([('index', 4), ('YEAR',1704.0), ('SUNACTIVITY', 36.0)])
Table ['books', 'sunspots']
Process finished with exit code 0
6 pymongo与mongodb
指定数据存储的目录
Mkdir h:/data/db
Mongod --dbpath h:/data/db
Python安装mongodb的驱动程序
Python -m pip install pymongo
查看当前驱动版本号
C:\Users\Administrator>python -m pipfreeze|grep pymongo
pymongo==3.3.1
与mongodb 的测试数据库进行联接
示例代码如下:
#!/usr/bin/env python# -*- coding: utf-8 -*-# @Time : 2017/1/3 11:41# @Author : Retacn# @Site : 连接mongodb测试数据库# @File : mongodb_conn.py# @Software: PyCharm__author__ = "retacn"__copyright__ = "property of mankind."__license__ = "CN"__version__ = "0.0.1"__maintainer__ = "retacn"__email__ = "zhenhuayue@sina.com"__status__ = "Development"from pymongo import MongoClientimport statsmodels.api as smimport jsonimport pandas as pd# 创建数据库连接client = MongoClient()db = client.test_database# 创建json并保存到数据库中data_loader = sm.datasets.sunspots.load_pandas()df = data_loader.datarows = json.loads(df.T.to_json()).values()db.sunspots.insert(rows)#查询所有内容cursor = db['sunspots'].find({})df = pd.DataFrame(list(cursor))print(df)db.drop_collection('sunspots')
运行结果如下:
SUNACTIVITY YEAR _id
0 16.6 1825.0 586b24872e29db269c8854f4
1 11.0 1745.0 586b24872e29db269c8854f5
2 4.3 1856.0 586b24872e29db269c8854f6
3 26.1 1921.0 586b24872e29db269c8854f7
4 15.5 1975.0 586b24872e29db269c8854f8
5 47.8 1831.0 586b24872e29db269c8854f9
6 38.0 1955.0 586b24872e29db269c8854fa
7 14.2 1922.0 586b24872e29db269c8854fb
8 37.6 1920.0 586b24872e29db269c8854fc
9 6.4 1797.0 586b24872e29db269c8854fd
10 4.1 1798.0 586b24872e29db269c8854fe
11 28.0 1720.0 586b24872e29db269c8854ff
12 111.2 1871.0 586b24872e29db269c885500
13 83.4 1750.0 586b24872e29db269c885501
14 27.9 1963.0 586b24872e29db269c885502
15 85.9 1761.0 586b24872e29db269c885503
16 32.3 1880.0 586b24872e29db269c885504
17 70.9 1830.0 586b24872e29db269c885505
18 17.9 1985.0 586b24872e29db269c885506
19 8.6 1996.0 586b24872e29db269c885507
20 88.8 1939.0 586b24872e29db269c885508
21 16.3 1866.0 586b24872e29db269c885509
22 22.0 1746.0 586b24872e29db269c88550a
23 1.4 1913.0 586b24872e29db269c88550b
24 5.0 1812.0 586b24872e29db269c88550c
25 14.5 1800.0 586b24872e29db269c88550d
26 84.8 1780.0 586b24872e29db269c88550e
27 138.3 1837.0 586b24872e29db269c88550f
28 54.8 1858.0 586b24872e29db269c885510
29 12.2 1813.0 586b24872e29db269c885511
.. ... ... ...
279 132.0 1787.0 586b24872e29db269c88560b
280 9.6 1944.0 586b24872e29db269c88560c
281 47.8 1752.0 586b24872e29db269c88560d
282 139.0 1870.0 586b24872e29db269c88560e
283 5.7 1911.0 586b24872e29db269c88560f
284 20.6 1854.0 586b24872e29db269c885610
285 121.5 1836.0 586b24872e29db269c885611
286 159.0 1959.0 586b24872e29db269c885612
287 59.1 1862.0 586b24872e29db269c885613
288 64.6 1840.0 586b24872e29db269c885614
289 66.5 1772.0 586b24872e29db269c885615
290 5.0 1902.0 586b24872e29db269c885616
291 82.9 1786.0 586b24872e29db269c885617
292 24.2 1842.0 586b24872e29db269c885618
293 4.4 1954.0 586b24872e29db269c885619
294 95.8 1860.0 586b24872e29db269c88561a
295 109.6 1938.0 586b24872e29db269c88561b
296 21.0 1724.0 586b24872e29db269c88561c
297 5.0 1700.0 586b24872e29db269c88561d
298 63.7 1883.0 586b24872e29db269c88561e
299 25.4 1886.0 586b24872e29db269c88561f
300 12.1 1899.0 586b24872e29db269c885620
301 61.5 1846.0 586b24872e29db269c885621
302 35.6 1891.0 586b24872e29db269c885622
303 79.7 1936.0 586b24872e29db269c885623
304 77.2 1861.0 586b24872e29db269c885624
305 36.3 1826.0 586b24872e29db269c885625
306 6.0 1879.0 586b24872e29db269c885626
307 26.2 1897.0 586b24872e29db269c885627
308 41.0 1794.0 586b24872e29db269c885628
[309 rows x 3 columns]
7 利用redis存储数据
是一个in-memory型的键值数据库,是c写的
示例代码如下:
import redisimport statsmodels.api as smimport pandas as pd# 与redis建立连接r = redis.StrictRedis()# 加载数据data_loader = sm.datasets.sunspots.load_pandas()df = data_loader.data# 通过json字符串创建记录data = df.T.to_json()r.set('sunspots', data)# 检索记录blob = r.get('sunspots')print(pd.read_json(blob))
运行结果如下:
0 1 10 100 101 102 103 104 105 \
SUNACTIVITY 5 11 3 14.5 34 45 43.1 47.5 42.2
YEAR 1700 1701 1710 1800.0 1801 1802 1803.0 1804.0 1805.0
106 ... 90 91 92 93 94 95 96 \
SUNACTIVITY 28.1 ... 89.9 66.6 60 46.9 41 21.3 16
YEAR 1806.0 ... 1790.0 1791.0 1792 1793.0 1794 1795.0 1796
97 98 99
SUNACTIVITY 6.4 4.1 6.8
YEAR 1797.0 1798.0 1799.0
8 Apache Cassandra
是结合了键值 和传统关系型数据库特性的混合型数据库,是面向列的数据库
Cassandra安装
1下载apache-cassandra-3.10-bin.tar.gz
2 修改cassandra.bat,添加JAVA_HOME变量
set JAVA_HOME=C:\Java\jdk1.8.0_71
如果本机内存不够大,可以将虚拟机内存调小.默认为2G
-Xms512m^
-Xmx512m^
3 python已安装略
4 修改cqlsh.bat.,设置python路径
set path =D:\Python35
5 添加环境变量
Path=F:\apache-cassandra-3.10\bin;
6 cmd执行cassandra,启动数据库
学习本章时未找到支持python3.5的驱动,所以使用ptyhon2.7
示列代码如下:
from cassandra import ConsistencyLevelfrom cassandra.cluster import Clusterfrom cassandra.query import SimpleStatementimport statsmodels.api as sm# 与集群建立联接,并创建一个会话cluster = Cluster()session = cluster.connect()# keyspace用来存储数据表的容器.session.execute( "CREATE KEYSPACE IF NOT EXISTS mykeyspace WITH REPLICATION = { 'class' : 'SimpleStrategy', 'replication_factor' : 1 };")session.set_keyspace('mykeyspace')# 创建数据表session.execute("CREATE TABLE IF NOT EXISTS sunspots (year decimal PRIMARY KEY, sunactivity decimal);")# 添加数据操作query = SimpleStatement( "INSERT INTO sunspots (year, sunactivity) VALUES (%s, %s)", consistency_level=ConsistencyLevel.QUORUM)# 加载数据data_loader = sm.datasets.sunspots.load_pandas()df = data_loader.datarows = [tuple(x) for x in df.values]# 添加数据for row in rows: session.execute(query, row)# 输出总记录数print(session.execute("SELECT COUNT(*) FROM sunspots")._current_rows[0])session.execute('DROP KEYSPACE mykeyspace')cluster.shutdown()
运行结果如下:
Row(count=309)
- python数据分析学习笔记八
- python数据分析学习笔记
- Python数据分析学习笔记一
- Python数据分析学习笔记二
- Python数据分析学习笔记三
- Python数据分析学习笔记四
- Python数据分析学习笔记五
- Python数据分析学习笔记六
- python数据分析入门学习笔记儿
- python数据分析入门学习笔记
- python数据分析入门学习笔记儿
- python数据分析入门学习笔记儿
- python数据分析学习笔记一
- python数据分析学习笔记二
- python数据分析学习笔记三
- python数据分析学习笔记六
- python数据分析入门学习笔记
- # Python数据分析学习笔记(一)
- QPushButton的鼠标事件处理及EventFilter事件过滤器的用法
- 树莓派Debian支持ll
- DapperLambda发布
- python数据分析学习笔记九
- jquery Ajax操作
- python数据分析学习笔记八
- 剖析淘宝 TDDL ( TAOBAO DISTRIBUTE DATA LAYER )
- darwin streaming server 6.0.3 Linux编译
- LNMP添加、删除虚拟主机
- python数据分析学习笔记七
- HTTP、 HTTP1.1、 HTTP/2的区别
- h5中的列表
- 关于在高并发下生成订单号的策略
- [笔记]C++的空类