Numpy

来源:互联网 发布:caffe deploy.proto 编辑:程序博客网 时间:2024/06/06 01:03

一种多维数组对象

一、创建ndarry

#一切序列型对象import numpy as npdata = [6,7.5,8,0,1]arry = np.array(data)
arry.dtype
dtype('float64')
#嵌套列表将被转化成为登场等长的多维数组data1 = [[1,2,3,4],[2,3,4,5]]arry1 = np.array(data1)arry1
array([[1, 2, 3, 4],       [2, 3, 4, 5]])
arry1.shape
(2, 4)
arry1.dtype
dtype('int32')

其他创建数组的函数

np.zeros((3,2))#创建全为0的数组
array([[ 0.,  0.],       [ 0.,  0.],       [ 0.,  0.]])
np.empty((2,3))#创建一个没有任何数值的数组通常返回未初始化的垃圾值
array([[ 0.,  0.,  0.],       [ 0.,  0.,  0.]])

arrange 是range 的数组版本

np.arange(10)
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

二、narry的数据类型

#dtype是一个特殊的对象,它含有将ndarray将一块内存解释为指定数据类型的全部信息。arr=np.array([1,2,3,4],dtype=np.float64)arr1 = np.array([1,2,3,4],dtype=np.int32)print(arr.dtype)print(arr1.dtype)
float64int32

可以通过ndarray的astype方法显示的转换其dtype:

arr1=np.array([1,2,3,4,5,6])arr1.dtype
dtype('int32')
a = arr1.astype(np.float64)
a.dtype
dtype('float64')
a
array([ 1.,  2.,  3.,  4.,  5.,  6.])

还可以智能的调用其它数组的dtype

int_arr=np.arange(10)float_arr = np.array([.2,.13,.356,.123],dtype=np.float64)int_arr.dtype
dtype('int32')
float_arr.dtype
dtype('float64')
a = int_arr.astype(float_arr.dtype)#astype都会创建出一个新的数组,即使新的astype和老astype相同也是如此a.dtype
dtype('float64')

三、数组和标量之间的运算

arr = np.array([[1,2,3,4,5],[2,3,4,5,6]])arr*arr
array([[ 1,  4,  9, 16, 25],       [ 4,  9, 16, 25, 36]])
1/arr
array([[ 1.        ,  0.5       ,  0.33333333,  0.25      ,  0.2       ],       [ 0.5       ,  0.33333333,  0.25      ,  0.2       ,  0.16666667]])

四、基本的索引和切片

一维数组类似于列表的切片

arr = np.arange(10)arr[5]
5
arr[5:8]=12#给切片赋值后会自动传播arr
array([ 0,  1,  2,  3,  4, 12, 12, 12,  8,  9])
arr_slice = arr[5:8]arr_slice[1] = 12345arr
array([    0,     1,     2,     3,     4,    12, 12345,    12,     8,     9])
arr2d = np.array([[1,2,3],[4,5,6],[7,8,9]])arr2d[2]
array([7, 8, 9])
#选取单个元素arr2d[0,2]
3
#如果省去逗号后面的索引则返回对象是一个维度低一点的数组arr2d[0]
array([1, 2, 3])
arr3d=np.array([[[1,2,3],[2,3,4]],[[1,2,3],[2,3,4]]])arr3d
array([[[1, 2, 3],        [2, 3, 4]],       [[1, 2, 3],        [2, 3, 4]]])

标量值和数组值都可以赋值给arr3d[0]

arr3d=np.array([[[1,2,3],[2,3,4]],[[1,2,3],[2,3,4]]])old_values=arr3d[0].copy() arr3d[0]=42 arr3d
array([[[42, 42, 42],        [42, 42, 42]],       [[ 1,  2,  3],        [ 2,  3,  4]]])
arr3d[1,0]
array([1, 2, 3])

五、切片索引

#一维数组的切片和列表相似,#高维数组的切片花样更多可以在一个或者多个轴上进行切片,也可以与整数索引混合使用。#多维数组可以一次传入多个切片,就像传入多个索引一样。arr2d= np.array([[1,2,3,4],[2,3,4,5],[3,4,5,6]])arr2d[:2,1:]
array([[2, 3, 4],       [3, 4, 5]])
#通过整合证书索引和切片,可以的到低维度的切片:arr2d[1,:2]
array([2, 3])
arr2d[2,:1]
array([3])
#"只有冒号"表示选取整个轴因此你可以像下面这样只对高纬度轴进行切片:arr2d[:,:1]
array([[1],       [2],       [3]])

六、布尔型索引

import numpy as npfrom numpy.random import randnnames = np.array(['Bob','Joe','Will','Bob','Will','Joe','Joe'])data = randn(7,4)data
array([[ 0.01692542,  1.62604368,  0.70260541, -0.17921946],       [-0.32215765, -0.96035339,  0.28138802, -0.54872738],       [-0.23851212,  0.54054555,  1.09421485, -0.42092751],       [-0.17245937, -0.00616908,  0.26557359,  1.01388004],       [ 1.15670777, -0.5679677 , -1.57703592,  1.52332383],       [-0.73310924,  2.44377133, -0.02362226,  0.58656418],       [-1.12137563,  1.37417378, -0.11284187, -0.02534613]])
#数组的比较运算(如==)也是矢量化的,因此对names和字符串“Bob”的变焦运算将会产生布尔型数组:names == "Bob"
array([ True, False, False,  True, False, False, False], dtype=bool)
#可以将赋值运算与切片、整数混合使用:data[names == "Bob",:2]
array([[ 0.01692542,  1.62604368],       [-0.17245937, -0.00616908]])
data[names != "Bob",3]
array([-0.54872738, -0.42092751,  1.52332383,  0.58656418, -0.02534613])
#可以通过布尔型索引选取数组中的数据。将总是创建副本即使返回一摸一样的数组也是如此。#选取如上更多名字的数据可以使用&(和),|(或)之类的布尔运算符即可。mask = (names == "Bob")|(names == "Will")mask
array([ True, False,  True,  True,  True, False, False], dtype=bool)
data[mask]
array([[ 0.01692542,  1.62604368,  0.70260541, -0.17921946],       [-0.23851212,  0.54054555,  1.09421485, -0.42092751],       [-0.17245937, -0.00616908,  0.26557359,  1.01388004],       [ 1.15670777, -0.5679677 , -1.57703592,  1.52332383]])

通过布尔型数组设置是一种常用的手段

#将data中的所有小于0的数都设置成0data[data < 0] = 0data
array([[ 0.01692542,  1.62604368,  0.70260541,  0.        ],       [ 0.        ,  0.        ,  0.28138802,  0.        ],       [ 0.        ,  0.54054555,  1.09421485,  0.        ],       [ 0.        ,  0.        ,  0.26557359,  1.01388004],       [ 1.15670777,  0.        ,  0.        ,  1.52332383],       [ 0.        ,  2.44377133,  0.        ,  0.58656418],       [ 0.        ,  1.37417378,  0.        ,  0.        ]])
data[names!="Joe"]=3
data
array([[ 3.        ,  3.        ,  3.        ,  3.        ],       [ 0.        ,  0.        ,  0.28138802,  0.        ],       [ 3.        ,  3.        ,  3.        ,  3.        ],       [ 3.        ,  3.        ,  3.        ,  3.        ],       [ 3.        ,  3.        ,  3.        ,  3.        ],       [ 0.        ,  2.44377133,  0.        ,  0.58656418],       [ 0.        ,  1.37417378,  0.        ,  0.        ]])
data
array([[ 3.        ,  3.        ,  3.        ,  3.        ],       [ 0.        ,  0.        ,  0.28138802,  0.        ],       [ 3.        ,  3.        ,  3.        ,  3.        ],       [ 3.        ,  3.        ,  3.        ,  3.        ],       [ 3.        ,  3.        ,  3.        ,  3.        ],       [ 0.        ,  2.44377133,  0.        ,  0.58656418],       [ 0.        ,  1.37417378,  0.        ,  0.        ]])

七、花式索引

#花式索引是numpy的一个术语,他值得是利用整数数组进行索引。假设我们有一个8X4的数组:arr = np.empty((8,4))for i in range(8):    arr[i]=iarr
array([[ 0.,  0.,  0.,  0.],       [ 1.,  1.,  1.,  1.],       [ 2.,  2.,  2.,  2.],       [ 3.,  3.,  3.,  3.],       [ 4.,  4.,  4.,  4.],       [ 5.,  5.,  5.,  5.],       [ 6.,  6.,  6.,  6.],       [ 7.,  7.,  7.,  7.]])
arr[[2,4,1]]#选取索引为2,4,1的数据并形成新的数组。
array([[ 2.,  2.,  2.,  2.],       [ 4.,  4.,  4.,  4.],       [ 1.,  1.,  1.,  1.]])
arr[[-1,-2,-4]]#负数的时候从尾部选取
array([[ 7.,  7.,  7.,  7.],       [ 6.,  6.,  6.,  6.],       [ 4.,  4.,  4.,  4.]])
arr[[2,3],[1,2]]
array([ 2.,  3.])
arr = np.arange(32).reshape((8,4))arr
array([[ 0,  1,  2,  3],       [ 4,  5,  6,  7],       [ 8,  9, 10, 11],       [12, 13, 14, 15],       [16, 17, 18, 19],       [20, 21, 22, 23],       [24, 25, 26, 27],       [28, 29, 30, 31]])
arr[[1,5,7,2],[0,3,1,2]]
array([ 4, 23, 29, 10])
#np.ix_函数可以用来将两个一维数组转换为一个用于选取方形区域的索引器:arr[np.ix_([1,3,0],[2,1])]
array([[ 6,  5],       [14, 13],       [ 2,  1]])

花式索引与切片不一样的地方在于花式索引总是将数组复制

八、数组转置和轴转置 T和transpose

转置返回的是原数组的一组视图,

arr = np.arange(15).reshape(5,3)arr
array([[ 0,  1,  2],       [ 3,  4,  5],       [ 6,  7,  8],       [ 9, 10, 11],       [12, 13, 14]])
arr.T
array([[ 0,  3,  6,  9, 12],       [ 1,  4,  7, 10, 13],       [ 2,  5,  8, 11, 14]])
#在进行矩阵运算时常用到此操作比如计算两个矩阵之间的内积XTXarr = np.random.randn(6,3)
arr
array([[-0.72482924, -0.15887863,  0.12820258],       [-1.10691913,  0.58879058,  0.33353862],       [-1.62076341, -2.24148168, -1.07663131],       [ 1.25201958,  1.03908446, -1.15584852],       [-1.03521759, -0.32247311, -0.2021625 ],       [ 0.85230989,  1.74023867, -0.89794081]])
np.dot(arr.T,arr)
array([[ 7.74318206,  6.21433438, -0.72034726],       [ 6.21433438,  9.60827296, -0.10919847],       [-0.72034726, -0.10919847,  3.46997205]])

对于高维数组,用transpose()进行转置,同样swapaxes也用来转置需要接受一对轴编号。

arr = np.arange(16).reshape(2,2,4)arr.transpose(1,0,2)
array([[[ 0,  1,  2,  3],        [ 8,  9, 10, 11]],       [[ 4,  5,  6,  7],        [12, 13, 14, 15]]])

九、通用函数:快速地元素级数组函数

arr = np.arange(10)np.sqrt(arr)
array([ 0.        ,  1.        ,  1.41421356,  1.73205081,  2.        ,        2.23606798,  2.44948974,  2.64575131,  2.82842712,  3.        ])
np.exp(arr)
array([  1.00000000e+00,   2.71828183e+00,   7.38905610e+00,         2.00855369e+01,   5.45981500e+01,   1.48413159e+02,         4.03428793e+02,   1.09663316e+03,   2.98095799e+03,         8.10308393e+03])
from numpy.random import randnx = randn(8)y = randn(8)x
array([ 0.28203301, -0.07892771,  0.81310924, -0.94331123, -0.25919552,        0.72150282,  0.53915419,  0.65178723])
y
array([ 1.04158533, -0.86644761, -0.85211761, -1.03775007, -0.93885207,       -1.04613844,  0.44875021, -0.78079688])
np.maximum(x,y)
array([ 1.04158533, -0.07892771,  0.81310924, -0.94331123, -0.25919552,        0.72150282,  0.53915419,  0.65178723])
points = np.arange(-5,5,0.01)xs,ys = np.meshgrid(points,points)ys
array([[-5.  , -5.  , -5.  , ..., -5.  , -5.  , -5.  ],       [-4.99, -4.99, -4.99, ..., -4.99, -4.99, -4.99],       [-4.98, -4.98, -4.98, ..., -4.98, -4.98, -4.98],       ...,        [ 4.97,  4.97,  4.97, ...,  4.97,  4.97,  4.97],       [ 4.98,  4.98,  4.98, ...,  4.98,  4.98,  4.98],       [ 4.99,  4.99,  4.99, ...,  4.99,  4.99,  4.99]])
import matplotlib.pyplot as pltz = np.sqrt(xs**2 +ys**2)z
array([[ 7.07106781,  7.06400028,  7.05693985, ...,  7.04988652,         7.05693985,  7.06400028],       [ 7.06400028,  7.05692568,  7.04985815, ...,  7.04279774,         7.04985815,  7.05692568],       [ 7.05693985,  7.04985815,  7.04278354, ...,  7.03571603,         7.04278354,  7.04985815],       ...,        [ 7.04988652,  7.04279774,  7.03571603, ...,  7.0286414 ,         7.03571603,  7.04279774],       [ 7.05693985,  7.04985815,  7.04278354, ...,  7.03571603,         7.04278354,  7.04985815],       [ 7.06400028,  7.05692568,  7.04985815, ...,  7.04279774,         7.04985815,  7.05692568]])
plt.imshow(z,cmap=plt.cm.gray);plt.colorbar()plt.title("Image plot of $\sqrt{x^2+y^2}$ for a grid of values")plt.show()

png

十、将条件逻辑表示为数组运算

xarr = np.array([1.1,1.2,1.3,1.4,1.5])yarr = np.array([2.1,2.2,2.3,2.,2.5])cond = np.array({True,False,True,False,True})result=np.where(cond,xarr,yarr)result
array([ 1.1,  1.2,  1.3,  1.4,  1.5])
#将一个随机生成的矩阵的所有副职改为2所有正值改为-2arr = randn(4,4)arr
array([[ 0.38624386, -1.00098369,  0.12529794,  0.06718387],       [-0.86212575,  0.79334746, -1.14065487, -0.11984418],       [ 0.22126195, -1.12186889,  0.16895321, -0.28376501],       [ 1.54402042,  1.72169063, -0.13042339, -1.22360467]])
result = np.where(arr<0,2,-2)
result
array([[-2,  2, -2, -2],       [ 2, -2,  2,  2],       [-2,  2, -2,  2],       [-2, -2,  2,  2]])
#仅将正值设为2result1 = np.where(arr<0,arr,2)
result1
array([[ 2.        , -1.00098369,  2.        ,  2.        ],       [-0.86212575,  2.        , -1.14065487, -0.11984418],       [ 2.        , -1.12186889,  2.        , -0.28376501],       [ 2.        ,  2.        , -0.13042339, -1.22360467]])
arr = np.random.randn(5,4)arr
array([[ 0.95318109,  0.33850616, -0.1578126 ,  0.47392647],       [-0.62539985,  0.19150499,  0.6193759 , -0.42246954],       [ 0.28917353,  1.26054929,  0.51536637, -0.26027762],       [-0.80810081, -0.94702621, -0.55558203, -0.71978358],       [ 1.52878853,  0.43427938,  0.8075557 ,  0.4402416 ]])
arr.mean()
0.16779983922531791

十一、用于布尔型数组的方法

# 由于不二型在这些函数中会被强制转换为1和0.因此sum经常白用来计算数组中真值的数量。arr=randn(100)(arr>0).sum()
52
#any he all 分别用来检测数组中是否有真值和和知否全部为真值arr = randn(5,4)bools = np.where(arr>0,False,True)
array([[False,  True, False, False],       [ True,  True,  True,  True],       [ True, False, False, False],       [ True,  True,  True, False],       [False,  True,  True, False]], dtype=bool)
bools.any()
True

十二、排序

1、可以利用sort进行排序

arr = randn(8)
arr.sort()arr
array([-0.99944632, -0.65379151, -0.47410941, -0.19855683, -0.03635958,        0.42783182,  0.71936697,  1.20869883])
# 多维数组可以在任何方向上排序arr = randn(5,3)
arr
array([[ 1.79681182,  0.47185016,  0.23493617],       [ 0.02554242, -0.52912986, -0.22851694],       [ 0.72132674, -0.58927883,  0.44211988],       [ 1.15956262, -0.69075716,  1.70492523],       [-0.07949838, -1.16388945,  0.12913133]])
arr.sort(0)arr
array([[-1.16388945, -0.22851694,  0.02554242],       [-0.69075716, -0.07949838,  0.12913133],       [-0.58927883,  0.44211988,  0.72132674],       [-0.52912986,  0.47185016,  1.70492523],       [ 0.23493617,  1.15956262,  1.79681182]])

十三、唯一化以及其他的集合逻辑

np.unique()——找到数组中的唯一值并返回排序后的结果。。。以及np.in1d()—–判断一个数组中的值在另一个数组中的成员资格

ints= np.array([1,2,3,4,0.3,0.1,0.2,1,3,4])result = np.unique(ints)result
array([ 0.1,  0.2,  0.3,  1. ,  2. ,  3. ,  4. ])
values = np.array([3,4,6,3,2,1,5,7,5,4,3])np.intersect1d(values,[2,3,6])
array([2, 3, 6])

十四、用于数组的文件输入输出

result = np.loadtxt('E:\\Metamap\\MetaMap_analysis\\evaluation\\percentage\\data.csv',delimiter = ',')
result
array([[ 0.62,  0.66],       [ 0.59,  0.47],       [ 0.59,  0.81],       [ 0.61,  0.86],       [ 0.59,  0.81],       [ 0.4 ,  0.71],       [ 0.19,  0.39],       [ 0.6 ,  0.92],       [ 0.54,  0.81],       [ 0.41,  0.87],       [ 0.45,  1.  ],       [ 0.46,  0.74],       [ 0.48,  0.79],       [ 0.34,  0.44],       [ 0.7 ,  0.74],       [ 0.71,  0.78],       [ 0.45,  0.62],       [ 0.42,  0.66],       [ 0.36,  0.44],       [ 0.66,  0.7 ],       [ 0.45,  0.76],       [ 0.33,  0.62],       [ 0.75,  0.68],       [ 0.7 ,  0.84],       [ 0.32,  0.7 ],       [ 0.17,  0.24],       [ 0.3 ,  0.64],       [ 0.48,  0.61],       [ 0.2 ,  0.42],       [ 0.24,  0.59],       [ 0.44,  0.64],       [ 0.36,  0.61],       [ 0.54,  0.74],       [ 0.48,  0.75],       [ 0.42,  0.6 ],       [ 0.3 ,  0.57],       [ 0.48,  0.68],       [ 0.46,  0.63],       [ 0.65,  0.96]])
y = np.ones((3, 1))x = np.array([[1,2,3],[4,5,6]])np.dot(x,y)
array([[  6.],       [ 15.]])
x.shape
(2, 3)
y
array([[ 1.],       [ 1.],       [ 1.]])
from numpy.linalg import inv,qrt = randn(5,5)mat= t.T.dot(t)
inv(mat)
array([[ 32.74779219,  -6.78247905,   1.90087517, -19.68171336,         20.52736017],       [ -6.78247905,   1.67553732,  -0.45118898,   4.30818545,         -4.36960428],       [  1.90087517,  -0.45118898,   0.39336663,  -0.95285319,          0.97778041],       [-19.68171336,   4.30818545,  -0.95285319,  12.56330827,        -12.94342001],       [ 20.52736017,  -4.36960428,   0.97778041, -12.94342001,         13.51915338]])
mat.dot(inv(mat))
array([[  1.00000000e+00,   2.66453526e-15,  -1.11022302e-15,          3.55271368e-15,  -7.10542736e-15],       [  0.00000000e+00,   1.00000000e+00,   0.00000000e+00,          0.00000000e+00,  -1.42108547e-14],       [  1.06581410e-14,   8.88178420e-16,   1.00000000e+00,          1.77635684e-15,   1.77635684e-15],       [ -2.84217094e-14,   0.00000000e+00,  -8.88178420e-16,          1.00000000e+00,   1.42108547e-14],       [  2.84217094e-14,   0.00000000e+00,   1.77635684e-15,          0.00000000e+00,   1.00000000e+00]])
t
array([[  1.00000000e+00,   3.55271368e-15,  -3.55271368e-15,         -3.55271368e-15,  -7.10542736e-15],       [  0.00000000e+00,   1.00000000e+00,  -7.10542736e-15,          7.10542736e-15,   1.42108547e-14],       [ -1.77635684e-15,   2.84217094e-14,   1.00000000e+00,          0.00000000e+00,   5.68434189e-14],       [  6.66133815e-16,   7.10542736e-15,   3.55271368e-15,          1.00000000e+00,   1.42108547e-14],       [  2.22044605e-16,   0.00000000e+00,  -3.55271368e-15,          1.42108547e-14,   1.00000000e+00]])

十五、随机数生成

from numpy.random import randnrandn(10,10)
array([[ 1.13632612, -1.45881242, -1.00592461,  1.28686884, -1.26162204,        -0.62133433, -1.12341765, -0.53300163, -1.2454125 , -0.04534438],       [-0.26156748,  0.83571917,  0.10441261, -0.24060883,  0.20228283,         1.76894401, -0.67900176, -0.49805108,  0.61687765, -1.25980762],       [-1.15173729,  0.32734462, -1.03821865,  1.28775835, -0.07132051,         0.46049233, -0.5281878 ,  0.26285282,  0.49567833,  1.31493268],       [-2.08738882,  2.04372852,  0.24664217, -0.59139293, -0.58268641,         0.5278138 , -0.19005287, -0.4150044 ,  0.82098552, -0.80051365],       [ 0.05032985, -1.21115779,  0.027123  ,  0.24805999, -0.87726063,        -2.01558966, -0.44453935,  0.93227665, -0.10701501,  0.78539994],       [-0.84233685, -0.3097025 , -1.07596246,  0.34408001, -0.83077666,         1.08542034,  1.52935916, -0.78211128, -1.39408413, -0.75259018],       [-0.43803443,  0.05634459, -0.72569903, -2.26118029,  1.0082606 ,         0.02478734, -1.87761307, -0.43614829,  0.98382574, -2.27732961],       [ 0.92350553,  1.61498294, -0.24545321,  0.71119926, -0.64213654,        -0.078434  ,  1.36862653, -0.24849693, -0.16676013, -0.06395134],       [-2.52244745, -1.27448594, -1.14716814, -0.03065553, -0.21178795,         0.59812425, -1.33649615, -1.73688871,  0.78922909,  1.76343392],       [ 1.14032151,  0.88458959, -0.41389419, -0.06406893, -0.96833927,        -1.41763581,  0.10250839,  1.17142435,  0.82599246, -0.07918591]])

十六、范例:随机漫步