Python 中的numpy 库

来源：互联网发布：西厢后弦知乎编辑：程序博客网时间：2024/04/27 14:33

这里写图片描述

待整理的

Numpy & Pandas

Pandas数据转为 numpy数据

df_numpyMatrix = df.as_matrix()df_numpyMatrix=df.values

a=([3.234,34,3.777,6.33])#a为python的list类型#将a转化为numpy的array:  np.array(a)array([  3.234,  34.   ,   3.777,   6.33 ])

#将a转化为python的lista.tolist()

python中list与array互相转换

u = array([[1,2],[3,4]])m = u.tolist()   #转换为listm.remove(m[0])    #移除m[0]m = np.array(m)    #转换为array

numpy库是数值计算，NumPy为Python带来了真正的多维数组功能，并且提供了丰富的函数库处理这些数组。它将常用的数学函数都进行数组化，使得这些数学函数能够直接对数组进行操作。NumPy为Python提供了快速的多维数组处理的能力，但是它不能用来做各种科学计算，这时候就需要scipy了。

NumPy小抄

#array initializationimport numpy as npnp.array([2, 3, 4])np.empty(20, dtype=np.float32)np.zeros(200)np.ones((3,3), dtype=np.int32)np.eye(200)np.zeros_like(a)np.linspace(0., 10., 100)  # 100 points from o to 10np.arange(0, 100, 2) np.logspace(-5, 2, 100)    #100 log-spaces points between -5 and 2np.copy(a)                 # copy array to new memory#数学本质是一种截断.clip的第一个参数表示目标区间最小值，第二个参数表示最大值，原始序列凡小于这个最小值的被这个最小值所替换，凡大于这个最大值的被这个最大值替换，必须指定一个参数，则另一个参数默认为∞。np.clip()

import numpy as nparray = np.array([[1,2,3],[2,3,4]])print(array)print('number of dim:',array.ndim) #矩阵的维度print('shape:',array.shape) # 矩阵的大小print('size:',array.size) #矩阵的元素个数

# 数组的typea = np.array([2,3,4],dtype=np.int64)print(a.dtype)a = np.array([2,3,4],dtype=np.float)#default is float64print(a.dtype)a = np.array([2,3,4],dtype=np.float32)print(a.dtype)

# 创建各种各样的array矩阵和数组a = np.array([2,3,5])print(a)a = np.zeros((3,4),dtype=np.int32) #生成一个零矩阵a = np.ones((3,4),dtype=np.int64)# 生成一个单位矩阵a = np.empyt((3,4))# 生成一个空矩阵a = np.arange(10,20,2)#生成一个数列，从10到20，步长为2,有序数列a = np.arange(12).reshape((3,4)) #生成一个矩阵，可定义shapea = np.linspace(1,10,20).reshape((4,5)) #生成一个从1到10的数列，有20段，有序矩阵。class_pred = np.array([])class_actual=np.array([])ss = np.array([]).reshape(-1,2)array([], dtype=float64)array([], dtype=float64)array([], shape=(0, 2), dtype=float64)print(class_pred.shape, type(class_pred), class_actual.shape, type(class_actual), ss.shape, type(ss))((0,), <type 'numpy.ndarray'>, (0,), <type 'numpy.ndarray'>, (0, 2), <type 'numpy.ndarray'>)

Reading/ Writing files

np.fromfile(fname/object, dtype=np.float32, count=5)np.loadtxt(fname/object, skiprows=2, delimiter=',')

indexing

a = np.arange(100)a[:3] = 0a[2:5] = 1  #set indices 2-4 to 1a[start:stop:step]a[None, :]a[[1, 1, 3, 8]]a = a.reshape(10, 10)  # transform to 10 x 10 matrixa.T           #return transposed viewb = np.transpose(a, (1,0))a[a<2]

#索引import numpy as npA =np.arange(2,14).reshape((3,4))print(A)print(np.argmin(A))print(np.argmax(A))print(np.mean(A))print(A.mean())print(np.average(A))print(np.median(A))print(A)print(np.cumsum(A)) #逐步累加print(np.diff(A)) #前后之差，类差print(np.nonzero(A))print(np.sort(A))print(np.transpose(A)) #矩阵的转置print(A.T)print((A.T).dot(A)) # A'Aprint(np.clip(A,5,9)) #矩阵的截断，小于5为5，大于9的为9import numpy as npA = np.arange(3,15)print(A)print(A[3])A = np.arange(3,15).reshape((3,4))print(A)print(A[3])#输出1行1列print(A[1][1])print(A[1,1])print(A[2,:]) #2行print(A[:,1])#1列print(A[1,1:3])#输出每一行for row in A:    print(row)# 输出一列for column in A.T:    print(column)#输出每一个元素print(A.flatten()) #返回所有值for item in A.flat:    print(item)

array properties and operations

a.shapelen(a)a.ndima.sort(axis=1)a.flatten()a.conj()a.astype(np.int16)#numpy中的数据类型转换，不能直接改原数据的dtype!  只能用函数astype()np.argmax(a, axis=1)np.cumsum(a)np.any(a)np.all(a)np.argsort(a, axis =1)np.linalg.norm顾名思义，linalg=linear+algebra，norm则表示范数，首先需要注意的是范数是对向量（或者矩阵）的度量，是一个标量（scalar）：

numpy数据类型dtype转换，astype

a = np.random.random(4)>>> aarray([ 0.0945377 ,  0.52199916,  0.62490646,  0.21260126])>>> a.dtypedtype(‘float64‘)>>> a.shape(4,)#改变dtype，发现数组长度翻倍！>>> a.dtype = ‘float32‘>>> aarray([  3.65532693e+20,   1.43907535e+00,  -3.31994873e-25,         1.75549972e+00,  -2.75686653e+14,   1.78122652e+00,        -1.03207532e-19,   1.58760118e+00], dtype=float32)>>> a.shape(8,)#用 astype(int) 得到整数，并且不改变数组长度b = np.array([1., 2., 3., 4.])print(b.dtype, b.shape, b)c = b.astype(int)print(c.dtype, c.shape, c)(dtype('float64'), (4,), array([ 1.,  2.,  3.,  4.]))(dtype('int64'), (4,), array([1, 2, 3, 4]))

详细参考见numpy数据类型dtype转换

x = np.array([3, 4])np.linalg.norm(x)      #  默认二范数  5.np.linalg.norm(x, ord=2) #二范数  ℓ2：sqrt(x1^2+x2^2+…+xn^2)5.np.linalg.norm(x, ord=1)  # 1 范数    ℓ1：|x1|+|x2|+…+|xn|7.np.linalg.norm(x, ord=np.inf) #无穷范数 ℓ∞：max(|xi|)

范数理论的一个小推论告诉我们：ℓ1≥ℓ2≥ℓ∞

boolean arrays

a < 2(a < 2) & (b > 10) # elementwise logical and(a < 2) | (b > 10) # elementwise logical or~ a                # invert boolean array

# elementwise operations and math functions

a*2a + 5a + ba / bnp.exp(a)        # exponential (complex and real)np.power(a, b)   # a to the power bnp.sin(a)np.cos(a)np.arctan2(a,b)np.arcsin(a)np.radians(a) # degrees to radiansnp.degrees(a)  # radians to degreesnp.var(a)    #variance of arraynp.std(a, axis=1)  #standard deviation

# numpy 中的基础运算import numpy as npa = np.array([10,20,30,40])b = np.arange(4)print(a,b)c = a + bc = b**2  # b^2c = b**4  # b^4c = 10*np.sin(a)c = 10*np.cos(b)c = 10*np.tan(a)print(c)print(b)print(b<3)print(b==3)#矩阵的运算 import numpy as npa = np.array([[1,1],[0,1]])b = np.arange(4).reshape((2,3))print(a,b)#逐个乘法c = a*bc_dot=np.dot(a,b) #矩阵乘法c_dot2=a.dot(b)#矩阵乘法print(c)print(c_dot)print(c_dot2)import numpy as npa =np.random.random((2,4)) # 随机生成的矩阵，从0都1中print(np.sum(a))print(np.min(a))print(np.max(a))print(np.sum(a,axis=1)) #对行计算print(np.min(a,axis=0)) #对列中求最小值print(np.max(a,axis =1))#对行中求最大值

numpy的array合并

import numpy as npA = np.array([1,1,1])B = np.array([2,2,2])C = np.vstack((A,B))# vertical stack 上下合并,竖直方向print(C)print(A.shape,C.shape)np.vstack([np.array([1, 2, 3]), np.array([4, 5, 6])])array([[1, 2, 3],       [4, 5, 6]])np.column_stack([np.array([1, 2, 3]), np.array([4, 5, 6])])array([[1, 4],       [2, 5],       [3, 6]])D = np.hstack((A,B)) # horizontal stack 左右合并,水平方向print(D)print(A.shape,D.shape)np.hstack([np.array([1, 2, 3]), np.array([4, 5, 6])])array([1, 2, 3, 4, 5, 6])# 两者近乎等效np.row_stack([np.array([1, 2, 3]), np.array([4, 5, 6])])array([[1, 2, 3],       [4, 5, 6]])print(A.T.shape)print(A[np.newaxis,:].shape)print(A[:,np.newaxis].shape)#多个合并C =np.concatenate((A,B,A,B,A,A))print(C)#行和列合并C = np.concatenate((A,B,A,B,A,A),axis=1)C = np.concatenate((A,B,A,B,A,A),axis=0)print(C)

np.concatenate((a, b), axis=0) == np.vstack((a, b))                    # 也对应于默认的情况，np.concatenate((a, b)) np.concatenate((a, b), axis=1) == np.hstack((a, b))

a = np.ndarray((3, 2, 3))b = np.ndarray((2, 2, 3))print(a.shape, b.shape)(3, 2, 3) (2, 2, 3)c = np.concatenate((a, b), axis = 0)print(c.shape)(5, 2, 3)

#append,矩阵的追加是采用append这个函数a = np.array([1, 2, 3, 4, 5])a = np.append(a, 10)array([ 1,  2,  3,  4,  5, 10])a = np.append(a, [1, 2, 3])array([ 1,  2,  3,  4,  5, 10,  1,  2,  3])

#列表的扩展(extend), 列表的扩展就是把两个列表合并,采用extend函数a = [1, 2, 3, 4]b = [5, 6, 7, 8]c = a.extend(b)ca[1, 2, 3, 4, 5, 6, 7, 8]请注意extend这个函数的返回值是None，所以上面c的输出为空，而a的值已经变了，所以它是直接在a后面扩展的，并没有任何返回值。

#列表的追加直接用append就行a = [1, 2,3,4]a.append(6)[1, 2, 3, 4, 6]

numpy数组拼接方法介绍

#思路：首先将数组转成列表，然后利用列表的拼接函数append()、extend()等进行拼接处理，最后将列表转成数组>>> import numpy as np>>> a=np.array([1,2,5])>>> b=np.array([10,12,15])>>> a_list=list(a)>>> b_list=list(b)>>> a_list.extend(b_list)>>> a_list[1, 2, 5, 10, 12, 15]>>> a=np.array(a_list)>>> aarray([ 1,  2,  5, 10, 12, 15])该方法只适用于简单的一维数组拼接

#思路：numpy提供了numpy.append(arr, values, axis=None)函数。对于参数规定，要么一个数组和一个数值；要么两个数组，不能三个及以上数组直接append拼接。append函数返回的始终是一个一维数组。>>> a=np.arange(5)>>> aarray([0, 1, 2, 3, 4])>>> np.append(a,10)array([ 0,  1,  2,  3,  4, 10])>>> aarray([0, 1, 2, 3, 4])>>> b=np.array([11,22,33])>>> barray([11, 22, 33])>>> np.append(a,b)array([ 0,  1,  2,  3,  4, 11, 22, 33])#numpy的数组没有动态改变大小的功能，numpy.append()函数每次都会重新分配整个数组，并把原来的数组复制到新数组中。

#思路：numpy提供了numpy.concatenate((a1,a2,...), axis=0)函数。能够一次完成多个数组的拼接。其中a1,a2,...是数组类型的参数>>> a=np.array([1,2,3])>>> b=np.array([11,22,33])>>> c=np.array([44,55,66])>>> np.concatenate((a,b,c),axis=0)  # 默认情况下，axis=0可以不写array([ 1,  2,  3, 11, 22, 33, 44, 55, 66]) #对于一维数组拼接，axis的值不影响最后的结果>>> a=np.array([[1,2,3],[4,5,6]])>>> b=np.array([[11,21,31],[7,8,9]])>>> np.concatenate((a,b),axis=0)array([[ 1,  2,  3],       [ 4,  5,  6],       [11, 21, 31],       [ 7,  8,  9]])>>> np.concatenate((a,b),axis=1)  #axis=1表示对应行的数组进行拼接array([[ 1,  2,  3, 11, 21, 31],       [ 4,  5,  6,  7,  8,  9]])

numpy的array分割

import numpy as npA = np.arange(12).reshape((3,4))print(A)print(np.split(A,2,axis=1))#列，纵向print(np.split(A,3,axis=0))#行分割print(np.split(A,3,axis=1))# 不可分割成不相等的分割，列方向#不等的分割print(np.array_split(A,3,axis=1))#列不等的分割print(np.vsplit(A,3)) # 纵向分割，行分割print(np.hsplit(A,2)) #横向分割,列分割

numpy 的copy

#numpy的赋值有关联,也就是说a,b,c,d任何一个值变化，其他值也会有变化import numpy as npa = np.arange(4)#有序数列b = ac = ad = bd[1:3] =[22,33]d is a b is a #不关联的赋值b = a.copy()  #deep copyprint(b)a[3]=44print(a,b)

inner/ outer products

np.dot(a, b)np.einsum('ij,kj->ik', a, b)np.sum(a, axis=1)np.abs(a)a[None, :] + b[:, None]a[None, :] * b[:, None]np.outer(a, b)np.sum(a * a.T)

interpolation, integration

np.trapz(a, x=x, axis=1)np.interp(x, xp, yp)

fft

np.fft.fft(a)f = np.fft.fftfreq(len(a))np.fft.fftshift(f)np.fft.rfft(a)np.fft.rfftfreq(len(a))

rounding

np.ceil(a)    # rounds to nearest upper intnp.floor(a)   # rounds to nearest lower intnp.round(a)   # runds to nearest int

random variables

np.random.normal(loc=0, scale=2, size=100)np.random.seed(23032)np.random.rand(200)np.random.uniform(1, 30, 200)np.random.randint(1, 16, 300)

本文中的 numpy.ipynb 格式可见我的CSDN下载。

#https://python.freelycode.com/contribution/detail/340#http://www.datadependence.com/2016/05/scientific-python-numpy/#Python科学计算之NumPy#NumPy是Python用于处理大型矩阵的一个速度极快的数学库。#它允许你在Python中做向量和矩阵的运算，而且很多底层的函数都是用C写的，#你将获得在普通Python中无法达到的运行速度。

#数组基础#NumPy的功能围绕着一些叫数组的东西。其实NP矩阵，但我们并不需要担心。#有了这些数组，我们可以做各种有用的东西，如快速处理向量和矩阵数学。#创建一个数组4种不同的方法。最基本的方法就是传递一个序列给NumPy的array()函数；#你可以传递给它任何的序列，不仅仅是你通常看到的列表。import numpy as np# 1D Arraya = np.array([0, 1, 2, 3, 4])b = np.array((0, 1, 2, 3, 4))c = np.arange(5)d = np.linspace(0, 2*np.pi, 5)print(a)print(b)print(c)print(d)print(a[3])

[0 1 2 3 4][0 1 2 3 4][0 1 2 3 4][ 0.          1.57079633  3.14159265  4.71238898  6.28318531]3

#上面的数组例子是如何用NumPy表示一个向量。#接下来我们就来看看我们如何可以展示矩阵和更多的多维数组。aa = np.array([[11, 12, 13, 14, 15],               [16, 17, 18, 19, 20],               [21, 22, 23, 24, 25],               [26, 27, 28, 29, 30],               [31, 32, 33, 34, 35]])print(aa)print(aa[2,4])

[[11 12 13 14 15] [16 17 18 19 20] [21 22 23 24 25] [26 27 28 29 30] [31 32 33 34 35]]25

#向量是同时具有方向和大小的量。#矩阵类似于向量，除了它是由行和列组成外，更像一个网格。矩阵中的值可以通过给对应的行和列来引用#多维数组切片import numpy as np# MD slicingprint(aa[0, 1:4])print(aa[1:4, 0])print(aa[::2, ::2])print(aa[:, 1])

[12 13 14][16 21 26][[11 13 15] [21 23 25] [31 33 35]][12 17 22 27 32]

#数组属性aa = np.array([[11, 12, 13, 14, 15],               [16, 17, 18, 19, 20],               [21, 22, 23, 24, 25],               [26, 27, 28, 29, 30],               [31, 32, 33, 34, 35]])print(type(aa))print(aa.dtype)print(aa.size)print(aa.shape)print(aa.itemsize)  #“itemsize”属性是每项占用了多少个字节,int64有64位，每个字节8位print(aa.ndim)      #“ndim”属性是指某个数组是几维的print(aa.nbytes)    #“nbytes”属性是某个数组中所有数据所占用的字节

<class 'numpy.ndarray'>int6425(5, 5)82200

#处理数组#基本操作# Basic Operatorsaaa = np.arange(25)print(aaa)aaa = aaa.reshape((5, 5))print(aaa)bbb = np.array([10, 62, 1, 14, 2, 56, 79, 2, 1, 45,               4, 92, 5, 55, 63, 45, 45, 3, 123, 45,               34,5,67, 23,456])print(bbb)bbb = bbb.reshape((5,5))print(bbb)print( aaa + bbb)print(aaa -bbb)print(aaa*bbb)print(aaa/bbb)print(aaa**2)print(aaa<bbb)print(aaa>bbb)print(aaa.dot(bbb))  #dot()函数计算出两个数组的点积

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24][[ 0  1  2  3  4] [ 5  6  7  8  9] [10 11 12 13 14] [15 16 17 18 19] [20 21 22 23 24]][ 10  62   1  14   2  56  79   2   1  45   4  92   5  55  63  45  45   3 123  45  34   5  67  23 456][[ 10  62   1  14   2] [ 56  79   2   1  45] [  4  92   5  55  63] [ 45  45   3 123  45] [ 34   5  67  23 456]][[ 10  63   3  17   6] [ 61  85   9   9  54] [ 14 103  17  68  77] [ 60  61  20 141  64] [ 54  26  89  46 480]][[ -10  -61    1  -11    2] [ -51  -73    5    7  -36] [   6  -81    7  -42  -49] [ -30  -29   14 -105  -26] [ -14   16  -45    0 -432]][[    0    62     2    42     8] [  280   474    14     8   405] [   40  1012    60   715   882] [  675   720    51  2214   855] [  680   105  1474   529 10944]][[ 0.          0.01612903  2.          0.21428571  2.        ] [ 0.08928571  0.07594937  3.5         8.          0.2       ] [ 2.5         0.11956522  2.4         0.23636364  0.22222222] [ 0.33333333  0.35555556  5.66666667  0.14634146  0.42222222] [ 0.58823529  4.2         0.32835821  1.          0.05263158]][[  0   1   4   9  16] [ 25  36  49  64  81] [100 121 144 169 196] [225 256 289 324 361] [400 441 484 529 576]][[ True  True False  True False] [ True  True False False  True] [False  True False  True  True] [ True  True False  True  True] [ True False  True False  True]][[False False  True False  True] [False False  True  True False] [ True False  True False False] [False False  True False False] [False  True False False False]][[  335   418   289   572  2130] [ 1080  1833   679  1652  5185] [ 1825  3248  1069  2732  8240] [ 2570  4663  1459  3812 11295] [ 3315  6078  1849  4892 14350]]

#数组中特定的操作# dot, sum, min, max, cumsuma4 = np.arange(10)print(a4.sum())print(a4.min())print(a4.max())print(a4.cumsum()) #cumsum()逐项相加之和

4509[ 0  1  3  6 10 15 21 28 36 45]

print(a4)

[0 1 2 3 4 5 6 7 8 9]

#高级索引#奇特的索引# Fancy indexinga5 = np.arange(0, 100, 10)indices = [1, 5, -1]b5 = a5[indices]print(a5)print(b5)

[ 0 10 20 30 40 50 60 70 80 90][10 50 90]

#布尔屏蔽#布尔屏蔽是一个奇妙的特性，它允许我们按照我们指定的条件来检索元素。# Boolean maskingimport matplotlib.pyplot as plta6 = np.linspace(0, 2*np.pi, 50)b6 = np.sin(a6)plt.plot(a6,b6)mask = b6 >=0plt.plot(a6[mask], b6[mask], 'bo')mask = (b6 >=0) & (a6 <= np.pi/2)plt.plot(a6[mask], b6[mask], 'go')plt.show()

这里写图片描述

#不完整的索引#Incomplete Indexinga7 = np.arange(0,100,10)b7 = a7[:5]c7 = a7[a7 >=50]print(b7)print(c7)

[ 0 10 20 30 40][50 60 70 80 90]

#Where,当需要以特定条件来检索数组元素的时候。#只需要传递给它一个条件，它将返回符合条件的元素列表。# wherea8 = np.arange(0,100,10)b8 = np.where(a8<50)c8 = np.where(a8>=50)[0]print(b8)print(c8)

(array([0, 1, 2, 3, 4]),)[5 6 7 8 9]

numpy 辨异函数

np.repeat 与 np.tile二者执行的是均是复制操作；np.repeat：复制的是多维数组的每一个元素；np.tile：复制的是多维数组本身；

#np.repeat>> x = np.arange(1, 5).reshape(2, 2)>> np.repeat(x, 2)array([1, 1, 2, 2, 3, 3, 4, 4])        # 对数组中的每一个元素进行复制        # 除了待重复的数组之外，只有一个额外的参数时，高维数组也会 flatten 至一维

#在行的方向上（axis=1），在列的方向上（axis=0）>> np.repeat(x, 3, axis=1)array([[1, 1, 1, 2, 2, 2],       [3, 3, 3, 4, 4, 4]])>> np.repeat(x, 3, axis=0)array([[1, 2],       [1, 2],       [1, 2],       [3, 4],       [3, 4],       [3, 4]])

#numpy 下的 np.tile有些类似于 matlab 中的 repmat函数。不需要 axis 关键字参数，仅通过第二个参数便可指定在各个轴上的复制倍数。>> a = np.arange(3)>> np.tile(a, 2)array([0, 1, 2, 0, 1, 2])>> np.tile(a, (2, 2))array([[0, 1, 2, 0, 1, 2],       [0, 1, 2, 0, 1, 2]])>> b = np.arange(1, 5).reshape(2, 2)>> np.tile(b, 2)array([[1, 2, 1, 2],       [3, 4, 3, 4]])# 等价于>> np.tile(b, (1, 2))

References

Python科学计算之NumPy

An Introduction to Scientific Python – NumPy

机器学习入门必备的13张小抄

numpy教程 pandas教程 Python数据科学计算简介

0 0