【Pandas-Cookbook】03:噪音数据处理

来源:互联网 发布:sql批量修改字段值 编辑:程序博客网 时间:2024/04/30 18:16
# -*-coding:utf-8-*-# ---------------------# Chapter 3 - Which borough has the most noise complaints (or, more selecting data).ipynb# ---------------------import pandas as pdimport matplotlib.pyplot as pltimport numpy as npcomplaints = pd.read_csv('../data/311-service-requests.csv')  # 读取csv文件print complaints.head()print complaints[:5]'''    3.1 Selecting only noise complaints'''noise_complaints = complaints[complaints['Complaint Type'] == 'Noise - Street/Sidewalk']print noise_complaints[:3]print complaints['Complaint Type'] == 'Noise - Street/Sidewalk'  # 返回True Falseis_noise = complaints['Complaint Type'] == 'Noise - Street/Sidewalk'in_brooklyn = complaints['Borough'] == 'BROOKLYN'print complaints[is_noise & in_brooklyn][:5]print complaints[is_noise & in_brooklyn][['Complaint Type', 'Borough', 'Created Date', 'Descriptor']][:10]'''    3.2 A digression about numpy arrays'''pf = pd.Series([1, 2, 3])print pfprint pf.valuesprint pf.indexnf = np.array([1, 2, 3])print nfprint nf != 2print nf[nf != 2]'''    3.3 So, which borough has the most noise complaints?'''is_noise = complaints['Complaint Type'] == "Noise - Street/Sidewalk"noise_complaints = complaints[is_noise]print noise_complaints['Borough'].value_counts()noise_complaint_counts = noise_complaints['Borough'].value_counts()complaint_counts = complaints['Borough'].value_counts()print noise_complaint_counts / complaint_countsprint noise_complaint_counts / complaint_counts.astype(float)(noise_complaint_counts / complaint_counts.astype(float)).plot(kind='bar')plt.show()
0 0
原创粉丝点击