python数据分析学习笔记六

来源:互联网 发布:originlab mac 编辑:程序博客网 时间:2024/03/28 17:22

 第六章 数据可视化

1 matplotlib绘图入门

import matplotlib.pyplot as pltimport numpy as np# 设置横坐标,起点和终点x = np.linspace(0, 20)plt.plot(x, .5 + x)plt.plot(x, 1 + 2 * x, "--")# 将图像保存到文件中#plt.savefig()# 显示图像plt.show()

 

运行结果如下:

 

 

2 对数图

示例代码如下:

import matplotlib.pyplot as pltimport numpy as npimport pandas as pd# 将数据文件读入dataframedf = pd.read_csv("transcount.csv")df = df.groupby("year").aggregate(np.mean)years = df.index.valuescounts = df['trans_count'].values# 拟合数据poly = np.polyfit(years, np.log(counts), deg=1)print("Poly", poly)plt.semilogy(years, counts, 'o')plt.semilogy(years, np.exp(np.polyval(poly, years)))plt.show()

 

运行结果如下:

 

3 散点图

示例代码如下:

import matplotlib.pyplot as pltimport numpy as npimport pandas as pd# 读入数据文件到dataframedf = pd.read_csv("transcount.csv")# 对数据分组,aggregate施加numpy函数df = df.groupby("year").aggregate(np.mean)gpu = pd.read_csv("gpu_transcount.csv")gpu = gpu.groupby("year").aggregate(np.mean)# 按照年份进行数据连接df = pd.merge(df, gpu, how='outer', left_index=True, right_index=True)# 将nan替换为0df = df.replace(np.nan, 0)print(df)years = df.index.valuescounts = df['trans_count'].valuesgpu_counts = df['gpu_trans_count'].valuescnt_log = np.log(counts)plt.scatter(years,            cnt_log,            c=200 * years# 颜色            s=20 + 200 * gpu_counts / gpu_counts.max()# 大小            alpha=0.5# 透明度plt.show()

 

运行结果如下:

 

4 图例和注解

import matplotlib.pyplot as pltimport numpy as npimport pandas as pd# 将数据文件读入到dataframedf = pd.read_csv("transcount.csv")# 分组df = df.groupby('year').aggregate(np.mean)gpu = pd.read_csv('gpu_transcount.csv')gpu = gpu.groupby('year').aggregate(np.mean)# 按照年份进行数据链接df = pd.merge(df, gpu, how='outer', left_index=True, right_index=True)# 将nan替换为df = df.replace(np.nan, 0)# 取得各列的值years = df.index.valuescounts = df['trans_count'].valuesgpu_counts = df['gpu_trans_count'].values# 拟合数据poly = np.polyfit(years, np.log(counts), deg=1)plt.plot(years, np.polyval(poly, years), label='Fit')# 设置标注样式gpu_start = gpu.index.values.min()y_ann = np.log(df.at[gpu_start, 'trans_count'])ann_str = 'First GPU\n %d' % gpu_start# 在图中使用注解plt.annotate(ann_str,             xy=(gpu_start, y_ann),             arrowprops=dict(arrowstyle='->'),             xytext=(-30, +70),             textcoords="offset points")cnt_log = np.log(counts)# 绘制散点图plt.scatter(years,            cnt_log,            c=200 * years,            s=20 + 200 * gpu_counts / gpu_counts.max(),            alpha=0.5,            label="Scatter Plot")plt.legend(loc="upper left")# 启用网格plt.grid()# x轴和y轴的标签plt.xlabel('Year')plt.ylabel("Log Transistor Counts",           fontsize=16# 字体大小# 标题plt.title("Moore's Law & Transistor Counts")plt.show()

 

运行结果如下:

 

5 三维图

from mpl_toolkits.mplot3d.axes3d import Axes3Dimport matplotlib.pyplot as pltimport numpy as npimport pandas as pd# 读入数据文件到dataframedf = pd.read_csv("transcount.csv")# 分组df = df.groupby('year').aggregate(np.mean)gpu = pd.read_csv("gpu_transcount.csv")gpu = gpu.groupby('year').aggregate(np.mean)# 连接两组数据df = pd.merge(df, gpu, how='outer', left_index=True, right_index=True)df = df.replace(np.nan, 0)# figure是存放图像的容器fig = plt.figure()ax = Axes3D(fig)X = df.index.valuesY = np.log(df['trans_count'].values)# 创建坐标矩阵X, Y = np.meshgrid(X, Y)Z = np.log(df['gpu_trans_count'].values)# 绘制图像ax.plot_surface(X, Y, Z)# 设置各坐标轴标签ax.set_xlabel('Year')ax.set_ylabel('Log CPU transistor counts')ax.set_zlabel('Log GPU transistor counts')# 标题ax.set_title("Moore's Law & Transistor Counts")plt.show()

 

运行结果如下:

 

 

6 pandas绘图

示例代码如下:

import matplotlib.pyplot as pltimport numpy as npimport pandas as pd# 读入数据文件到dataframedf = pd.read_csv("transcount.csv")# 分组df = df.groupby('year').aggregate(np.mean)gpu = pd.read_csv("gpu_transcount.csv")gpu = gpu.groupby('year').aggregate(np.mean)# 连接两组数据df = pd.merge(df, gpu, how='outer', left_index=True, right_index=True)df = df.replace(np.nan, 0)# 绘制图像df.plot()# 绘制半对数图df.plot(logy=True)# 绘制散点图df[df['gpu_trans_count'] > 0].plot(kind='scatter'# 图像类型                                   x='trans_count',                                   y='gpu_trans_count',                                   loglog=True# 双对数图plt.show()

 

运行结果如下:



 

 

7 时滞图

import matplotlib.pyplot as pltimport numpy as npimport pandas as pdfrom pandas.tools.plotting import lag_plot# 读入数据文件到dataframedf = pd.read_csv("transcount.csv")# 分组df = df.groupby('year').aggregate(np.mean)gpu = pd.read_csv("gpu_transcount.csv")gpu = gpu.groupby('year').aggregate(np.mean)# 连接两组数据df = pd.merge(df, gpu, how='outer', left_index=True, right_index=True)df = df.replace(np.nan, 0)lag_plot(np.log(df['trans_count']))plt.show()

 

 

 

8 自相关图

import matplotlib.pyplot as pltimport numpy as npimport pandas as pdfrom pandas.tools.plotting import autocorrelation_plot# 读入数据文件到dataframedf = pd.read_csv("transcount.csv")# 分组df = df.groupby('year').aggregate(np.mean)gpu = pd.read_csv("gpu_transcount.csv")gpu = gpu.groupby('year').aggregate(np.mean)# 连接两组数据df = pd.merge(df, gpu, how='outer', left_index=True, right_index=True)df = df.replace(np.nan, 0)autocorrelation_plot(np.log(df['trans_count']))plt.show()

 

 

9 plot.ly

未测试

0 0
原创粉丝点击