《利用Python进行数据分析》笔记---第8章绘图和可视化
来源:互联网 发布:中小型企业网络架构 编辑:程序博客网 时间:2024/04/29 16:35
写在前面的话:
实例中的所有数据都是在GitHub上下载的,打包下载即可。
地址是:http://github.com/pydata/pydata-book
还有一定要说明的:
我使用的是Python2.7,书中的代码有一些有错误,我使用自己的2.7版本调通。
# coding: utf-8from pandas import Series, DataFrameimport pandas as pdimport numpy as npimport matplotlib.pyplot as pltfrom numpy.random import randnfig = plt.figure()ax1 = fig.add_subplot(2,2,1)ax2 = fig.add_subplot(2,2,2)ax3 = fig.add_subplot(2,2,3)plt.plot(randn(50).cumsum(),'k--')_ = ax1.hist(randn(100),bins=20,color='k',alpha=0.3)ax2.scatter(np.arange(30),np.arange(30) + 3 * randn(30))fig,axes = plt.subplots(2,3)axesfig,axes = plt.subplots(2,2,sharex=True,sharey=True)for i in range(2): for j in range(2): axes[i,j].hist(randn(500),bins=50,color='k',alpha=0.5)plt.subplots_adjust(wspace=0,hspace=0)plt.plot(randn(30).cumsum(),'ko--')data = randn(30).cumsum()plt.plot(data,'k--',label='Default')plt.plot(data,'k--',drawstyle='steps-post',label='steps-post')plt.legend(loc='best')fig = plt.figure();ax = fig.add_subplot(1,1,1)ax.plot(randn(1000).cumsum())ticks = ax.set_xticks([0,250,500,750,1000])labels = ax.set_xticklabels(['one','two','three','four','five'],rotation=30,fontsize='small')ax.set_title('aadadada dsadad sdad')fig = plt.figure()ax = fig.add_subplot(1,1,1)ax.plot(randn(1000).cumsum(),'k',label = 'one')ax.plot(randn(1000).cumsum(),'k--',label = 'two')ax.plot(randn(1000).cumsum(),'k.',label = 'three')ax.legend(loc = 'best')plt.show()from datetime import datetimefig = plt.figure()ax = fig.add_subplot(1,1,1)data = pd.read_csv('D:\Source Code\pydata-book-master\ch08\spx.csv',index_col = 0,parse_dates = True)spx = data['SPX']spx.plot(ax = ax,style = 'k-')crisis_data = [(datetime(2007,10,11),'Peak of bull market'),(datetime(2008,3,12),'Bear Stearns Fails'),(datetime(2008,9,15),'Lehman Bankruptcy')]for date,label in crisis_data: ax.annotate(label,xy = (date,spx.asof(date) + 50), xytext = (date,spx.asof(date) + 200), arrowprops = dict(facecolor = 'black'), horizontalalignment = 'left',verticalalignment = 'top')ax.set_xlim(['1/1/2007','1/1/2011'])ax.set_ylim([600,1800])ax.set_title('Important dates in 2008-2009 finacial crisis')plt.show()fig = plt.figure()ax = fig.add_subplot(1,1,1)rect = plt.Rectangle((0.2,0.75),0.4,0.15,color = 'k',alpha = 0.3)circ = plt.Circle((0.7,0.2),0.15,color = 'b',alpha = 0.3)pgon = plt.Polygon([[0.15,0.15],[0.35,0.4],[0.2,0.6]],color = 'g',alpha = 0.5)ax.add_patch(rect)ax.add_patch(circ)ax.add_patch(pgon)plt.show()s = Series(np.random.randn(10).cumsum(),index = np.arange(0,100,10))s.plot(use_index = False)df = DataFrame(np.random.randn(10,4).cumsum(0), columns = ['A','B','C','D'], index = np.arange(0,100,10))df.plot()fig,axes = plt.subplots(2,1)data = Series(np.random.randn(16),index = list('abcdefghijklmnop'))data.plot(kind = 'barh',ax = axes[0],color = 'k',alpha = 0.7)data.plot(kind = 'bar',ax = axes[1],color = 'k',alpha = 0.7)df = DataFrame(np.random.randn(6,4),index = ['one','two','three','four','five','six'], columns = pd.Index(['A','B','C','D'],name = 'Genus'))dfdf.plot(kind = 'bar')df.plot(kind = 'bar',stacked = True,alpha = 0.5)tips = pd.read_csv('D:\Source Code\pydata-book-master\ch08\\tips.csv')party_counts = pd.crosstab(tips.day,tips.size)party_countsparty_counts = party_counts.ix[:,2:5]party_pcts = party_counts.div(party_counts.sum(1).astype(float),axis = 0)party_pctsparty_pcts.plot(kind = 'bar',stacked = True)tips['tip_pct'] = tips['tip'] / tips['total_bill']tips['tip_pct'].hist(bins = 50)tips['tip_pct'].plot(kind = 'kde')comp1 = np.random.normal(0,1,size = 200)comp2 = np.random.normal(10,2,size = 200)values = Series(np.concatenate([comp1,comp2]))valuesvalues.hist(bins = 100,alpha = 0.3,color = 'k',normed = True)values.plot(kind = 'kde',style = 'k--')macro = pd.read_csv('D:\Source Code\pydata-book-master\ch08\macrodata.csv')data = macro[['cpi','m1','tbilrate','unemp']]trans_data = np.log(data).diff().dropna()trans_data[-5:]plt.scatter(trans_data['m1'],trans_data['unemp'])plt.title('Changes in log %s vs. log %s'%('m1','unemp'))pd.scatter_matrix(trans_data,diagonal = 'kde',color = 'k',alpha = 0.3)pd.scatter_matrix(trans_data,diagonal = 'hist',color = 'k',alpha = 0.3)data = pd.read_csv('D:\Source Code\pydata-book-master\ch08\Haiti.csv')datadata[['INCIDENT DATE','LATITUDE','LONGITUDE']][:10]data['CATEGORY'][:6]data.describe()data = data[(data.LATITUDE > 18) & (data.LATITUDE < 20) & (data.LONGITUDE > -75) & (data.LONGITUDE < -70) & data.CATEGORY.notnull()]def to_cat_list(catstr): stripped = (x.strip() for x in catstr.split(',')) return [x for x in stripped if x]def get_all_categoties(cat_series): cat_sets = (set(to_cat_list(x)) for x in cat_series) return sorted(set.union(*cat_sets))def get_english(cat): code,names = cat.split('.') if '|' in names: names = names.split('|')[1] return code,names.strip()all_cats = get_all_categoties(data.CATEGORY)english_mapping = dict(get_english(x) for x in all_cats)english_mapping['2a']english_mapping['6c']def get_code(seq): return [x.split('.')[0] for x in seq if x]all_codes = get_code(all_cats)code_index = pd.Index(np.unique(all_codes))dummy_frame = DataFrame(np.zeros((len(data),len(code_index))),index = data.index,columns = code_index)dummy_frame.ix[:,:6]for row,cat in zip(data.index,data.CATEGORY): codes = get_code(to_cat_list(cat)) dummy_frame.ix[row,codes] = 1data = data.join(dummy_frame.add_prefix('category_'))from mpl_toolkits.basemap import Basemapdef basic_haiti_map(ax = None,lllat = 17.25,urlat = 20.25,lllon = -75,urlon = -71): m = Basemap(ax = ax,projection = 'stere', lon_0 = (urlon + lllon) / 2, lat_0 = (urlat + lllat) / 2, llcrnrlat = lllat,urcrnrlat = urlat, llcrnrlon = lllon,urcrnrlon = urlon, resolution = 'f' ) m.drawcoastlines() m.drawstates() m.drawcountries() return mfig,axes = plt.subplots(nrows=2,ncols=2,figsize=(12,10))fig.subplots_adjust(hspace=0.05,wspace=0.05)to_plot = ['2a','1','3c','7a']lllat=17.25;urlat=20.25;lllon=-75;urlon=-71for code,ax in zip(to_plot,axes.flat): m = basic_haiti_map(ax,lllat=lllat,urlat=urlat,lllon=lllon,urlon=urlon) cat_data = data[data['category_%s' % code] == 1] x,y = m(cat_data.LONGITUDE,cat_data.LATITUDE) m.plot(x,y,'k.',alpha=0.5) ax.set_title('%s:%s'%(code,english_mapping[code]))shapefile_path = 'D:\Source Code\pydata-book-master\ch08\PortAuPrince_Roads'm.readshapefile(shapefile_path,'roads')
阅读全文
1 0
- 《利用Python进行数据分析》笔记---第8章绘图和可视化
- 《利用Python进行数据分析》第8章 绘图和可视化笔记
- 绘图和可视化 《利用Python进行数据分析》第8章 读书笔记
- 利用Python进行数据分析--绘图和可视化
- 利用python进行数据分析-绘图和可视化1
- 利用python进行数据分析-绘图和可视化2
- 利用python进行数据分析(六):绘图和可视化
- 转载]利用Python进行数据分析——绘图和可视化 xticks-学习笔记
- 《利用Python进行数据分析》笔记---第11章金融和经济数据应用
- 【Python】 利用汽车数据进行可视化分析
- 《利用Python进行数据分析》笔记---第4章NumPy基础:数组和矢量计算
- 数据集合与分组运算 《利用python进行数据分析》笔记,第9章
- 《利用Python进行数据分析》笔记---第2章--MovieLens 1M数据集
- 《利用Python进行数据分析》笔记---第6章数据加载、存储与文件格式
- 《利用Python进行数据分析》笔记---第7章数据规整化:清理、转换、合并、重塑
- 《利用Python进行数据分析》笔记---第9章数据聚合与分组运算
- 利用Python对NBA SportUV数据进行可视化及分析
- 《利用Python进行数据分析》笔记---第2章--1880-2010年间全美婴儿姓名
- win10 配置TensorFlow环境
- 关于js函数 形参和局部变量名相同 的问题
- iOS开发 MFMailComposeViewController(发送邮件)的使用
- Oracle内置函数之数值型函数
- Myeclipse导入xml编写代码提示步骤(以struts2 的配置文件为例)
- 《利用Python进行数据分析》笔记---第8章绘图和可视化
- netty学习笔记(一)
- java 查询mongo模糊匹配
- Java基础巩固之梦-01.错失工作机遇之谈
- STM32 Boot模式设置方法
- [pyweb]django的get_absolute_url函数
- 小学奥数主要内容汇总!
- 幂等操作的实现方案
- epoll 示例代码