python数据分析学习笔记六
来源:互联网 发布:originlab mac 编辑:程序博客网 时间:2024/03/28 17:22
第六章 数据可视化
1 matplotlib绘图入门
import matplotlib.pyplot as pltimport numpy as np# 设置横坐标,起点和终点x = np.linspace(0, 20)plt.plot(x, .5 + x)plt.plot(x, 1 + 2 * x, "--")# 将图像保存到文件中#plt.savefig()# 显示图像plt.show()
运行结果如下:
2 对数图
示例代码如下:
import matplotlib.pyplot as pltimport numpy as npimport pandas as pd# 将数据文件读入dataframedf = pd.read_csv("transcount.csv")df = df.groupby("year").aggregate(np.mean)years = df.index.valuescounts = df['trans_count'].values# 拟合数据poly = np.polyfit(years, np.log(counts), deg=1)print("Poly", poly)plt.semilogy(years, counts, 'o')plt.semilogy(years, np.exp(np.polyval(poly, years)))plt.show()
运行结果如下:
3 散点图
示例代码如下:
import matplotlib.pyplot as pltimport numpy as npimport pandas as pd# 读入数据文件到dataframedf = pd.read_csv("transcount.csv")# 对数据分组,aggregate施加numpy函数df = df.groupby("year").aggregate(np.mean)gpu = pd.read_csv("gpu_transcount.csv")gpu = gpu.groupby("year").aggregate(np.mean)# 按照年份进行数据连接df = pd.merge(df, gpu, how='outer', left_index=True, right_index=True)# 将nan替换为0df = df.replace(np.nan, 0)print(df)years = df.index.valuescounts = df['trans_count'].valuesgpu_counts = df['gpu_trans_count'].valuescnt_log = np.log(counts)plt.scatter(years, cnt_log, c=200 * years, # 颜色 s=20 + 200 * gpu_counts / gpu_counts.max(), # 大小 alpha=0.5) # 透明度plt.show()
运行结果如下:
4 图例和注解
import matplotlib.pyplot as pltimport numpy as npimport pandas as pd# 将数据文件读入到dataframedf = pd.read_csv("transcount.csv")# 分组df = df.groupby('year').aggregate(np.mean)gpu = pd.read_csv('gpu_transcount.csv')gpu = gpu.groupby('year').aggregate(np.mean)# 按照年份进行数据链接df = pd.merge(df, gpu, how='outer', left_index=True, right_index=True)# 将nan替换为df = df.replace(np.nan, 0)# 取得各列的值years = df.index.valuescounts = df['trans_count'].valuesgpu_counts = df['gpu_trans_count'].values# 拟合数据poly = np.polyfit(years, np.log(counts), deg=1)plt.plot(years, np.polyval(poly, years), label='Fit')# 设置标注样式gpu_start = gpu.index.values.min()y_ann = np.log(df.at[gpu_start, 'trans_count'])ann_str = 'First GPU\n %d' % gpu_start# 在图中使用注解plt.annotate(ann_str, xy=(gpu_start, y_ann), arrowprops=dict(arrowstyle='->'), xytext=(-30, +70), textcoords="offset points")cnt_log = np.log(counts)# 绘制散点图plt.scatter(years, cnt_log, c=200 * years, s=20 + 200 * gpu_counts / gpu_counts.max(), alpha=0.5, label="Scatter Plot")plt.legend(loc="upper left")# 启用网格plt.grid()# x轴和y轴的标签plt.xlabel('Year')plt.ylabel("Log Transistor Counts", fontsize=16) # 字体大小# 标题plt.title("Moore's Law & Transistor Counts")plt.show()
运行结果如下:
5 三维图
from mpl_toolkits.mplot3d.axes3d import Axes3Dimport matplotlib.pyplot as pltimport numpy as npimport pandas as pd# 读入数据文件到dataframedf = pd.read_csv("transcount.csv")# 分组df = df.groupby('year').aggregate(np.mean)gpu = pd.read_csv("gpu_transcount.csv")gpu = gpu.groupby('year').aggregate(np.mean)# 连接两组数据df = pd.merge(df, gpu, how='outer', left_index=True, right_index=True)df = df.replace(np.nan, 0)# figure是存放图像的容器fig = plt.figure()ax = Axes3D(fig)X = df.index.valuesY = np.log(df['trans_count'].values)# 创建坐标矩阵X, Y = np.meshgrid(X, Y)Z = np.log(df['gpu_trans_count'].values)# 绘制图像ax.plot_surface(X, Y, Z)# 设置各坐标轴标签ax.set_xlabel('Year')ax.set_ylabel('Log CPU transistor counts')ax.set_zlabel('Log GPU transistor counts')# 标题ax.set_title("Moore's Law & Transistor Counts")plt.show()
运行结果如下:
6 pandas绘图
示例代码如下:
import matplotlib.pyplot as pltimport numpy as npimport pandas as pd# 读入数据文件到dataframedf = pd.read_csv("transcount.csv")# 分组df = df.groupby('year').aggregate(np.mean)gpu = pd.read_csv("gpu_transcount.csv")gpu = gpu.groupby('year').aggregate(np.mean)# 连接两组数据df = pd.merge(df, gpu, how='outer', left_index=True, right_index=True)df = df.replace(np.nan, 0)# 绘制图像df.plot()# 绘制半对数图df.plot(logy=True)# 绘制散点图df[df['gpu_trans_count'] > 0].plot(kind='scatter', # 图像类型 x='trans_count', y='gpu_trans_count', loglog=True) # 双对数图plt.show()
运行结果如下:
7 时滞图
import matplotlib.pyplot as pltimport numpy as npimport pandas as pdfrom pandas.tools.plotting import lag_plot# 读入数据文件到dataframedf = pd.read_csv("transcount.csv")# 分组df = df.groupby('year').aggregate(np.mean)gpu = pd.read_csv("gpu_transcount.csv")gpu = gpu.groupby('year').aggregate(np.mean)# 连接两组数据df = pd.merge(df, gpu, how='outer', left_index=True, right_index=True)df = df.replace(np.nan, 0)lag_plot(np.log(df['trans_count']))plt.show()
8 自相关图
import matplotlib.pyplot as pltimport numpy as npimport pandas as pdfrom pandas.tools.plotting import autocorrelation_plot# 读入数据文件到dataframedf = pd.read_csv("transcount.csv")# 分组df = df.groupby('year').aggregate(np.mean)gpu = pd.read_csv("gpu_transcount.csv")gpu = gpu.groupby('year').aggregate(np.mean)# 连接两组数据df = pd.merge(df, gpu, how='outer', left_index=True, right_index=True)df = df.replace(np.nan, 0)autocorrelation_plot(np.log(df['trans_count']))plt.show()
9 plot.ly
未测试
0 0
- Python数据分析学习笔记六
- python数据分析学习笔记六
- 《利用python进行数据分析》学习笔记(六)
- Python学习笔记(六):数据可视化
- python数据分析学习笔记
- Python学习笔记:六
- Python数据分析学习笔记一
- Python数据分析学习笔记二
- Python数据分析学习笔记三
- Python数据分析学习笔记四
- Python数据分析学习笔记五
- python数据分析入门学习笔记儿
- python数据分析入门学习笔记
- python数据分析入门学习笔记儿
- python数据分析入门学习笔记儿
- python数据分析学习笔记一
- python数据分析学习笔记二
- python数据分析学习笔记三
- [Leetcode] #20 Valid Parentheses
- 准备研究netty源码
- mybatis入门-第一个程序
- Java:单例模式的七种写法
- c语言关键字volative的作用
- python数据分析学习笔记六
- 查看被占用端口
- vs2013 system.web.mvc未能加载原因(已解决)
- Git 使用记录
- 【干货】支付宝App架构揭秘—使用的开源组件总结!
- OpenVPN下载、安装、配置及使用详解
- 选择算法伪代码,C代码,以及分析
- java异常体系、自定义异常
- Windows Server 2012 R2 或 2016 无法安装 .Net 3.5.1