Python-Pandas(5)核心数据结构Series详解

来源:互联网 发布:dva矩阵 编辑:程序博客网 时间:2024/05/22 14:39
#Series (collection of values)#DataFrame (collection of Series objects)#Panel (collection of DataFrame objects)
#A Series object can hold many data types, including#float - for representing float values#int - for representing integer values#bool - for representing Boolean values#datetime64[ns] - for representing date & time, without time-zone#datetime64[ns, tz] - for representing date & time, with time-zone#timedelta[ns] - for representing differences in dates & times (seconds, minutes, etc.)#category - for representing categorical values#object - for representing String values#FILM - film name#RottenTomatoes - Rotten Tomatoes critics average score#RottenTomatoes_User - Rotten Tomatoes user average score#RT_norm - Rotten Tomatoes critics average score (normalized to a 0 to 5 point system)#RT_user_norm - Rotten Tomatoes user average score (normalized to a 0 to 5 point system)#Metacritic - Metacritic critics average score#Metacritic_User - Metacritic user average score
import pandas as pdfandango = pd.read_csv('fandango_score_comparison.csv')series_film = fandango['FILM']print(series_film[0:5])series_rt = fandango['RottenTomatoes']print (series_rt[0:5])

这里写图片描述

# Import the Series object from pandasfrom pandas import Seriesfilm_names = series_film.values#print type(film_names)#print film_namesrt_scores = series_rt.values#print rt_scoresseries_custom = Series(rt_scores , index=film_names)series_custom[['Minions (2015)', 'Leviathan (2014)']]

这里写图片描述

# int index is also aviableseries_custom = Series(rt_scores , index=film_names)series_custom[['Minions (2015)', 'Leviathan (2014)']]fiveten = series_custom[5:10]print(fiveten)

这里写图片描述

original_index = series_custom.index.tolist()#print original_indexsorted_index = sorted(original_index)sorted_by_index = series_custom.reindex(sorted_index)#print sorted_by_index

这里写图片描述

sc2 = series_custom.sort_index()sc3 = series_custom.sort_values()#print(sc2[0:10])print(sc3[0:10])

这里写图片描述

#The values in a Series object are treated as an ndarray, the core data type in NumPyimport numpy as np# Add each value with each otherprint np.add(series_custom, series_custom)# Apply sine function to each valuenp.sin(series_custom)# Return the highest value (will return a single value not a Series)np.max(series_custom)

这里写图片描述

#will actually return a Series object with a boolean value for each filmseries_custom > 50series_greater_than_50 = series_custom[series_custom > 50]criteria_one = series_custom > 50criteria_two = series_custom < 75both_criteria = series_custom[criteria_one & criteria_two]print both_criteria

这里写图片描述

#data alignment same indexrt_critics = Series(fandango['RottenTomatoes'].values, index=fandango['FILM'])rt_users = Series(fandango['RottenTomatoes_User'].values, index=fandango['FILM'])rt_mean = (rt_critics + rt_users)/2print(rt_mean)

这里写图片描述

原创粉丝点击