Numpy学习笔记——常用函数

来源:互联网 发布:相册制作软件 编辑:程序博客网 时间:2024/06/06 02:42

3.1 文件读写

3.2 动手实践:读写文件

savetxt():将数组保存到文件中

i2 = np.eye(2)print i2np.savetxt("eye.txt", i2)

3.3 CSV 文件

3.4 动手实践:读入CSV 文件

NumPy中的loadtxt函数可以方便地读取CSV文件,自动切分字段,并将数据载入NumPy数组。
usecols的参数为一个元组,以获取第7字段至第8字段的数据;
unpack参数设置为True,意思是分拆存储不同列的数据,即分别将收盘价和成交量的数组赋值给变量c和v;
调用average函数,并将v作为权重参数使用;

c,v=np.loadtxt('data.csv', delimiter=',', usecols=(6,7), unpack=True)vwap = np.average(c, weights=v)print "VWAP =", vwapThe output isVWAP = 350.589549353print "mean =", np.mean(c)mean = 351.037666667t = np.arange(len(c))print "twap =", np.average(c, weights=t)

3.8 动手实践:找到最大值和最小值

ptp函数可以计算数组的取值范围

h,l=np.loadtxt('data.csv',delimiter=',', usecols=(4,5), unpack=True)print "highest =", np.max(h)print "lowest =", np.min(l)print "Spread high price", np.ptp(h)print "Spread low price", np.ptp(l)

3.9 统计分析

中位数函数、排序函数

//中位数print "median =", np.median(c)//排序sorted_close = np.msort(c)N = len(c)//方差print "variance =", np.var(c)//取对数np.log(c)

NumPy中的diff函数可以返回一个由相邻数组元素的差值构成的数组。

//计算股票收益率returns = np.diff( arr ) / arr[ : -1]//标准差print "Standard deviation =", np.std(returns)//获取满足指定条件的元素 whereposretindices = np.where(returns > 0)

3.13 日期分析

获取28-01-2011类型字符串的日期对应的星期几

def datestr2num(s):    return datetime.datetime.strptime(s, "%d-%m-%Y").date().weekday()//converters函数转换日期字符串为具体数字dates, close=np.loadtxt('data.csv', delimiter=',', usecols=(1,6), converters={1:datestr2num}, unpack=True)//创建一个包含5个元素的数组averages = np.zeros(5)

take函数可以按照这些索引值从数组中取出相应的元素

for i in range(5):    indices = np.where(dates == i)    prices = np.take(close, indices)    avg = np.mean(prices)    print "Day", i, "prices", prices, "Average", avg    averages[i] = avgtop = np.max(averages)print "Highest average", top//返回最大值的索引值print "Top day of the week", np.argmax(averages)bottom = np.min(averages)print "Lowest average", bottom//返回最小值的索引值print "Bottom day of the week", np.argmin(averages)
def datestr2num(s):    return datetime.strptime(s, "%d-%m-%Y").date().weekday()dates, open, high, low, close=np.loadtxt('data.csv', delimiter=',', usecols=(1, 3, 4,5, 6), converters={1: datestr2num}, unpack=True)close = close[:16]dates = dates[:16]# get first Mondayfirst_monday = np.ravel(np.where(dates == 0))[0]print "The first Monday index is", first_monday# get last Fridaylast_friday = np.ravel(np.where(dates == 4))[-1]print "The last Friday index is", last_fridayweeks_indices = np.arange(first_monday, last_friday + 1)print "Weeks indices initial", weeks_indicesweeks_indices = np.split(weeks_indices, 3)print "Weeks indices after split", weeks_indicesdef summarize(a, o, h, l, c):    monday_open = o[a[0]]    week_high = np.max( np.take(h, a) )    week_low = np.min( np.take(l, a) )    friday_close = c[a[-1]]    return("APPL", monday_open, week_high, week_low, friday_close)weeksummary = np.apply_along_axis(summarize, 1, weeks_indices, open, high, low, close)print "Week summary", weeksummarynp.savetxt("weeksummary.csv", weeksummary, delimiter=",", fmt="%s")

np.exp()函数

x = np.arange(5)print "Exp", np.exp(x)

np.linspace()函数:返回将-1到0分成5分的点

print "Linspace", np.linspace(-1, 0, 5)

weights.sum():返回这个数组的和
weights/weights.sum():会将weights中的所有元素除以分母

weights = np.exp(np.linspace(-1. , 0. , N))weights /= weights.sum()print "Weights", weights

convolve函数:

c = np.loadtxt('data.csv', delimiter=',', usecols=(6,), unpack=True)ema = np.convolve(weights, c)[N-1:-N+1]t = np.arange(N - 1, len(c))plot(t, c[N-1:], lw=1.0)plot(t, ema, lw=2.0)show()

绘制布林带

import numpy as npimport sysfrom matplotlib.pyplot import plotfrom matplotlib.pyplot import showN = int(sys.argv[1])weights = np.ones(N) / Nprint "Weights", weightsc = np.loadtxt('data.csv', delimiter=',', usecols=(6,), unpack=True)sma = np.convolve(weights, c)[N-1:-N+1]deviation = []for i in range(N - 1, C):if i + N < C:    dev = c[i: i + N]    else:    dev = c[-N:]    averages = np.zeros(N)    averages.fill(sma[i - N - 1])    dev = dev - averages    dev = dev ** 2    dev = np.sqrt(np.mean(dev))    deviation.append(dev)deviation = 2 * np.array(deviation)print len(deviation), len(sma)upperBB = sma + deviationlowerBB = sma - deviationc_slice = c[N-1:]between_bands = np.where((c_slice < upperBB) & (c_slice > lowerBB))print lowerBB[between_bands]print c[between_bands]print upperBB[between_bands]between_bands = len(np.ravel(between_bands))print "Ratio between bands", float(between_bands)/len(c_slice)t = np.arange(N - 1, C)plot(t, c_slice, lw=1.0)plot(t, sma, lw=2.0)plot(t, upperBB, lw=3.0)plot(t, lowerBB, lw=4.0)show()

NumPy的linalg包是专门用于线性代数计算

import numpy as npimport sysN = int(sys.argv[1])c = np.loadtxt('data.csv', delimiter=',', usecols=(6,), unpack=True)b = c[-N:]b = b[::-1]print "b", bA = np.zeros((N, N), float)print "Zeros N by N", Afor i in range(N):    A[i, ] = c[-N - 1 - i: - 1 - i]print "A", A(x, residuals, rank, s) = np.linalg.lstsq(A, b)print x, residuals, rank, sprint np.dot(b, x)

3.28 动手实践:绘制趋势线

import numpy as npfrom matplotlib.pyplot import plotfrom matplotlib.pyplot import showdef fit_line(t, y):    A = np.vstack([t, np.ones_like(t)]).T    return np.linalg.lstsq(A, y)[0]h, l, c = np.loadtxt('data.csv', delimiter=',' , usecols=(4, 5, 6), unpack=True)pivots = (h + l + c ) / 3print "Pivots", pivotst = np.arange(len(c))sa, sb = fit_line(t, pivots - (h - l))ra, rb = fit_line(t, pivots + (h - l))support = sa * t + sbresistance = ra * t + rbcondition = (c > support) & (c < resistance)print "Condition", conditionbetween_bands = np.where(condition)print support[between_bands]print c[between_bands]print resistance[between_bands]between_bands = len(np.ravel(between_bands))print "Number points between bands", between_bandsprint "Ratio between bands", float(between_bands)/len(c)print "Tomorrows support", sa * (t[-1] + 1) + sbprint "Tomorrows resistance", ra * (t[-1] + 1) + rba1 = c[c > support]a2 = c[c < resistance]print "Number of points between bands 2nd approach" ,len(np. intersect1d(a1, a2))plot(t, c)plot(t, support)plot(t, resistance)show()

3.30 动手实践:数组的修剪和压缩

clip方法返回一个修剪过的数组,也就是将所有比给定最大值还大的元素全部设为给定的最大值,而所有比给定最小值还小的元素全部设为给定的最小值。

a = np.arange(5)print "a =", aprint "Clipped", a.clip(1, 2)

compress方法返回一个根据给定条件筛选后的数组

a = np.arange(4)print aprint "Compressed", a.compress(a > 2)

prod方法,可以计算数组中所有元素的乘积

b = np.arange(1, 9)print "b =", bprint "Factorial", b.prod()

调用cumprod方法,计算数组元素的累积乘积

print "Factorials", b.cumprod()