Python3.5——Pandas模块使用(中)——DataFrame

来源:互联网 发布:pdf解密软件在线 编辑:程序博客网 时间:2024/06/05 10:37

1、DataFrame的创建

(1)通过二维数组方式创建


#!/usr/bin/env python# -*- coding:utf-8 -*-# Author:ZhengzhengLiuimport numpy as npimport pandas as pdfrom pandas import Series,DataFrame#1.DataFrame通过二维数组创建print("======DataFrame直接通过二维数组创建======")d1 = DataFrame([["a","b","c","d"],[1,2,3,4]])print(d1)print("======DataFrame借助array二维数组创建======")arr = np.array([    ["jack",78],    ["lili",86],    ["amy",97],    ["tom",100]])d2 = DataFrame(arr,index=["01","02","03","04"],columns=["姓名","成绩"])print(d2)print("========打印行索引========")print(d2.index)print("========打印列索引========")print(d2.columns)print("========打印值========")print(d2.values)#运行结果:======DataFrame直接通过二维数组创建======   0  1  2  30  a  b  c  d1  1  2  3  4======DataFrame借助array二维数组创建======      姓名   成绩01  jack   7802  lili   8603   amy   9704   tom  100========打印行索引========Index(['01', '02', '03', '04'], dtype='object')========打印列索引========Index(['姓名', '成绩'], dtype='object')========打印值========[['jack' '78'] ['lili' '86'] ['amy' '97'] ['tom' '100']]

(2)通过字典方式创建


#2.DataFrame通过字典创建,键作为列索引,键值作为数据值,行索引值自动生成data = {    "apart":['1101',"1102","1103","1104"],    "profit":[2000,4000,5000,3500],    "month":8}d3 = DataFrame(data)print(d3)print("========行索引========")print(d3.index)print("========列索引========")print(d3.columns)print("========数据值========")print(d3.values)#运行结果:apart  month  profit0  1101      8    20001  1102      8    40002  1103      8    50003  1104      8    3500========行索引========RangeIndex(start=0, stop=4, step=1)========列索引========Index(['apart', 'month', 'profit'], dtype='object')========数据值========[['1101' 8 2000] ['1102' 8 4000] ['1103' 8 5000] ['1104' 8 3500]]

2、DataFrame数据获取




import numpy as npimport pandas as pdfrom pandas import Series,DataFrame#3.DataFrame获取数据data = {    "apart":['1101',"1102","1103","1104"],    "profit":[2000,4000,5000,3500],    "month":8}d3 = DataFrame(data)print(d3)print("======获取一列数据======")print(d3["apart"])print("======获取一行数据======")print(d3.ix[1])print("======修改数据值======")d3["month"] = [7,8,9,10]                #修改值d3["year"] = [2001,2001,2003,2004]      #新增列d3.ix["4"] = np.NaNprint(d3)#运行结果: apart  month  profit0  1101      8    20001  1102      8    40002  1103      8    50003  1104      8    3500======获取一列数据======0    11011    11022    11033    1104Name: apart, dtype: object======获取一行数据======apart     1102month        8profit    4000Name: 1, dtype: object======修改数据值======  apart  month  profit    year0  1101    7.0  2000.0  2001.01  1102    8.0  4000.0  2001.02  1103    9.0  5000.0  2003.03  1104   10.0  3500.0  2004.04   NaN    NaN     NaN     NaN

3、pandas基本功能


(1)pandas数据文件读取



import numpy as npimport pandas as pdfrom pandas import Series,DataFrame#pandas基本操作#1.数据文件读取df = pd.read_csv("data.csv")print(df)#运行结果:    name  age  source0  gerry   18    98.51    tom   21    78.22   lili   24    98.53   john   20    89.2


(2)数据过滤获取


import numpy as npimport pandas as pdfrom pandas import Series,DataFrame#pandas基本操作#1.数据文件读取df = pd.read_csv("data.csv")print(df)#2.数据过滤获取columns = ["姓名","年龄","成绩"]df.columns = columns        #更改列索引print("=======更改列索引========")print(df)#获取几列的值df1 = df[columns[1:]]print("=======获取几列的值========")print(df1)print("=======获取几行的值========")print(df.ix[1:3])#删除含有NaN值的行df2 = df1.dropna()print("=======删除含有NaN值的行=======")print(df2)#运行结果: name  age  source0  gerry   18    98.51    tom   21     NaN2   lili   24    98.53   john   20    89.2=======更改列索引========      姓名  年龄    成绩0  gerry  18  98.51    tom  21   NaN2   lili  24  98.53   john  20  89.2=======获取几列的值========   年龄    成绩0  18  98.51  21   NaN2  24  98.53  20  89.2=======获取几行的值========     姓名  年龄    成绩1   tom  21   NaN2  lili  24  98.53  john  20  89.2=======删除含有NaN值的行=======   年龄    成绩0  18  98.52  24  98.53  20  89.2