Python中关于CSV文件中的I/O

来源:互联网 发布:python 字节流 编辑:程序博客网 时间:2024/06/10 12:41
这用pandas包进行数据处理的时候,会遇到一些文件的输出与写入。比如csv文件,是一种比较常见的文件,它以逗号作为分隔符。所以,总结一下运用pandas数据处理包,处理csv文件的输出与写入方式。

# 数据读取import pandas as pd# 这是文件有标头的df = pd.read_table("ex1.csv", sep= ",")# print df# 文件没有标题行df1 = pd.read_csv("ex2.csv", header= None)# print df1df11 = pd.read_csv("ex2.csv", names= ["a","b","c","d","message"])# print df11# 如果想要message列做索引df111 = pd.read_csv("ex2.csv", names= ["a","b","c","d","message"], index_col= "message")# print df111# 取某值# print df111.ix["hello","a"]# 取行# print df111.ix["hello"]# 取列# print df111["a"]# 读入成层次化索引parsed = pd.read_csv("ex3.csv", index_col= ["key1","key2"])# print parsed# 处理读入文件特殊符号l = list(open("ex3.txt"))# print lresult = pd.read_table("ex3.txt", sep = "\s+")# print result# 处理有些异形文件格式# skiprows 跳过文件的某些行df2 = pd.read_csv("ex4.csv", skiprows=[0,2,3], nrows=2)# print df2# 处理缺失值df3 = pd.read_csv("ex5.csv")# print df3# print pd.isnull(df3)# na_values可以接受一组表示缺失值的字符串result1 = pd.read_csv("ex5.csv", na_values=["NULL"])# print result1sentinels = {"message":["foo","NA"], "somethig":["two"]}result2 = pd.read_csv("ex5.csv", na_values=sentinels)# print result2
# 数据的写入

from pandas import Series
import numpy as np
data = pd.read_csv("ex5.csv")
# print data
# data.to_csv("out.csv")
# 禁用行和列得标签
# data.to_csv("out.csv",index=False, header=False)
# 也可以写出一部分列
# data.to_csv("out1.csv",index=False,columns=['a','b','c'])
datas = pd.date_range('1/1/2000', periods=7)
# print datas
ts = Series(np.arange(7), index=datas)
# ts.to_csv("out2.csv")
# Series的特殊读取文件方法
read = Series.from_csv("out2.csv")
# print read
# 再来看如果手动处理CSV文件。

import csv
# 读取
f = open("ex7.csv")
read = csv.reader(f)
'''for line in read:
print line
'''
# 现在为了格式很好看,做一下修改
lines = list(csv.reader(open("ex7.csv")))
header , values = lines[0], lines[1:]
data_dict = {h:v for h, v in zip(header, zip(*values))}
print data_dict

#写入
# 先定义一个csv文件的合适
class my_dialect(csv.Dialect):
lineterminator = "\n"
delimiter = ";"
quotechar = '""'

with open("ex7.csv", "w") as f:
writer = csv.writer(f, dialect=my_dialect)
writer.writerow(('one','two','three'))
writer.writerow(('1','2','3'))
writer.writerow(('4','5','6'))
 

引用的csv文件如下:

ex1.csv

a,b,c,d,message
1,2,3,4,hello
5,6,7,8,world
9,10,11,12,foo

ex2.csv

1,2,3,4,hello
5,6,7,8,world
9,10,11,12,foo

ex3.csv

key1,key2,value1,value2
one,a,1,2
one,b,3,4
one,c,5,6
one,d,7,8
two,a,9,10
two,b,11,12
two,c,13,14
two,d,15,16

ex3.txt

      A  B  C
aaa 1 5 9
bbb 2 6 10
ccc 3 7 11
ddd 4 8 12

ex4.csv

# hey!
a,b,c,d,message
# just wanted to make things more difficult for you
# who reads CSV files with computers, anyway?
1,2,3,4,hello
5,6,7,8,world
9,10,11,12,foo

ex5.csv

something,a,b,c,d,message
one,1,2,3,4,NA
two,5,6,,8,world
three,9,10,11,12,foo

ex7.csv

"a","b","c"
"1","2","3"
"1","2","3","4"