CNN 卷积神经网络-- 残差计算
来源:互联网 发布:生化危机7ps4淘宝 编辑:程序博客网 时间:2024/04/30 00:30
前言
本文主要是解析论文Notes onConvolutional Neural Networks的公式,参考了http://blog.csdn.net/lu597203933/article/details/46575871的公式推导,借用https://github.com/BigPeng/JavaCNN代码
CNN
cnn每一层会输出多个feature map, 每个feature map由多个神经元组成,假如某个feature map的shape是m*n, 则该feature map有m*n个神经元
卷积层
卷积计算
设当前层l为卷积层,下一层l+1为子采样层subsampling.
则卷积层l的输出feature map为:
残差计算
设当前层l为卷积层,下一层l+1为子采样层subsampling.
第l层的第j个feature map的残差公式为:
其中
其导数
为了之后的推导,先提前讲讲subsample过程,比较简单,假设采样层是对卷积层的均值处理,如卷积层的输出feature map(
则经过subsample的结果是:
subsample过程如下:
import java.util.Arrays;/** * Created by keliz on 7/7/16. */public class test{ /** * 卷积核或者采样层scale的大小,长与宽可以不等. */ public static class Size { public final int x; public final int y; public Size(int x, int y) { this.x = x; this.y = y; } } /** * 对矩阵进行均值缩小 * * @param matrix * @param scale * @return */ public static double[][] scaleMatrix(final double[][] matrix, final Size scale) { int m = matrix.length; int n = matrix[0].length; final int sm = m / scale.x; final int sn = n / scale.y; final double[][] outMatrix = new double[sm][sn]; if (sm * scale.x != m || sn * scale.y != n) throw new RuntimeException("scale不能整除matrix"); final int size = scale.x * scale.y; for (int i = 0; i < sm; i++) { for (int j = 0; j < sn; j++) { double sum = 0.0; for (int si = i * scale.x; si < (i + 1) * scale.x; si++) { for (int sj = j * scale.y; sj < (j + 1) * scale.y; sj++) { sum += matrix[si][sj]; } } outMatrix[i][j] = sum / size; } } return outMatrix; } public static void main(String args[]) { int row = 4; int column = 4; int k = 0; double[][] matrix = new double[row][column]; Size s = new Size(2, 2); for (int i = 0; i < row; ++i) for (int j = 0; j < column; ++j) matrix[i][j] = ++k; double[][] result = scaleMatrix(matrix, s); System.out.println(Arrays.deepToString(matrix).replaceAll("],", "]," + System.getProperty("line.separator"))); System.out.println(Arrays.deepToString(result).replaceAll("],", "]," + System.getProperty("line.separator"))); }}
其中3.5=(1+2+5+6)/(2*2); 5.5=(3+4+7+8)/(2*2)
由此可知,卷积层输出的feature map中的值为1的节点,值为2的节点,值为5的节点,值为6的节点(神经元)与subsample层的值为3.5的节点相连接,值为3,值为4,值为7,值为8节点与subsample层的值为5.5节点相连接。由BP算法章节的推导结论可知
卷积层第j个节点的残差等于子采样层与其相连接的所有节点的权值乘以相应的残差的加权和再乘以该节点的导数
对着公式看比较容易理解这句话。
假设子采样层的对应文中的卷积层的残差
按照公式(1),节点1值为0.5的残差是
因为这是计算单个神经元的残差,所以需要把
即
同理,对于节点2,
残差为
对于节点5,
残差为
对于节点6,
残差为
因为节点3对应的子采样层的残差是0.6,所以节点3的残差为
即
公式(1)使用了一个技巧,实现这个计算过程,把子采样层的残差
即
与
做克罗内克积 .
结果是
计算过程:
import java.util.Arrays;/** * Created by keliz on 7/7/16. */public class kronecker { /** * 卷积核或者采样层scale的大小,长与宽可以不等. */ public static class Size { public final int x; public final int y; public Size(int x, int y) { this.x = x; this.y = y; } } /** * 克罗内克积,对矩阵进行扩展 * * @param matrix * @param scale * @return */ public static double[][] kronecker(final double[][] matrix, final Size scale) { final int m = matrix.length; int n = matrix[0].length; final double[][] outMatrix = new double[m * scale.x][n * scale.y]; for (int i = 0; i < m; i++) { for (int j = 0; j < n; j++) { for (int ki = i * scale.x; ki < (i + 1) * scale.x; ki++) { for (int kj = j * scale.y; kj < (j + 1) * scale.y; kj++) { outMatrix[ki][kj] = matrix[i][j]; } } } } return outMatrix; } public static void main(String args[]) { int row = 2; int column = 2; double k = 0.5; double[][] matrix = new double[row][column]; Size s = new Size(2, 2); for (int i = 0; i < row; ++i) for (int j = 0; j < column; ++j){ matrix[i][j] = k; k += 0.1; } System.out.println(Arrays.deepToString(matrix).replaceAll("],", "]," + System.getProperty("line.separator"))); double[][] result = kronecker(matrix, s); System.out.println(Arrays.deepToString(result).replaceAll("],", "]," + System.getProperty("line.separator"))); }}
将
子采样层
采样计算
假设采样层是对卷积层的均值处理,如卷积层的输出feature map(
则经过subsample的结果是:
公式为:
卷积与子采样层的计算公式都包含
subsample过程如下:
import java.util.Arrays;/** * Created by keliz on 7/7/16. */public class test{ /** * 卷积核或者采样层scale的大小,长与宽可以不等. */ public static class Size { public final int x; public final int y; public Size(int x, int y) { this.x = x; this.y = y; } } /** * 对矩阵进行均值缩小 * * @param matrix * @param scale * @return */ public static double[][] scaleMatrix(final double[][] matrix, final Size scale) { int m = matrix.length; int n = matrix[0].length; final int sm = m / scale.x; final int sn = n / scale.y; final double[][] outMatrix = new double[sm][sn]; if (sm * scale.x != m || sn * scale.y != n) throw new RuntimeException("scale不能整除matrix"); final int size = scale.x * scale.y; for (int i = 0; i < sm; i++) { for (int j = 0; j < sn; j++) { double sum = 0.0; for (int si = i * scale.x; si < (i + 1) * scale.x; si++) { for (int sj = j * scale.y; sj < (j + 1) * scale.y; sj++) { sum += matrix[si][sj]; } } outMatrix[i][j] = sum / size; } } return outMatrix; } public static void main(String args[]) { int row = 4; int column = 4; int k = 0; double[][] matrix = new double[row][column]; Size s = new Size(2, 2); for (int i = 0; i < row; ++i) for (int j = 0; j < column; ++j) matrix[i][j] = ++k; double[][] result = scaleMatrix(matrix, s); System.out.println(Arrays.deepToString(matrix).replaceAll("],", "]," + System.getProperty("line.separator"))); System.out.println(Arrays.deepToString(result).replaceAll("],", "]," + System.getProperty("line.separator"))); }}
其中3.5=(1+2+5+6)/(2*2); 5.5=(3+4+7+8)/(2*2)
残差计算
设当前层l为子采样层,下一层l+1为卷积层.
第l层的第j个feature map的残差公式为:
设子采样层的输出feature map(
对应的卷积层的kernel(
则卷积层输出的feature map为
假设卷积层的delta(
delta与feature map是一一对应
计算过程:
import java.util.Arrays;/** * Created by keliz on 7/7/16. */public class conv { /** * 复制矩阵 * * @param matrix * @return */ public static double[][] cloneMatrix(final double[][] matrix) { final int m = matrix.length; int n = matrix[0].length; final double[][] outMatrix = new double[m][n]; for (int i = 0; i < m; i++) { for (int j = 0; j < n; j++) { outMatrix[i][j] = matrix[i][j]; } } return outMatrix; } /** * 对矩阵进行180度旋转,是在matrix的副本上复制,不会对原来的矩阵进行修改 * * @param matrix */ public static double[][] rot180(double[][] matrix) { matrix = cloneMatrix(matrix); int m = matrix.length; int n = matrix[0].length; // 按列对称进行交换 for (int i = 0; i < m; i++) { for (int j = 0; j < n / 2; j++) { double tmp = matrix[i][j]; matrix[i][j] = matrix[i][n - 1 - j]; matrix[i][n - 1 - j] = tmp; } } // 按行对称进行交换 for (int j = 0; j < n; j++) { for (int i = 0; i < m / 2; i++) { double tmp = matrix[i][j]; matrix[i][j] = matrix[m - 1 - i][j]; matrix[m - 1 - i][j] = tmp; } } return matrix; } /** * 计算valid模式的卷积 * * @param matrix * @param kernel * @return */ public static double[][] convnValid(final double[][] matrix, double[][] kernel) { kernel = rot180(kernel); int m = matrix.length; int n = matrix[0].length; final int km = kernel.length; final int kn = kernel[0].length; // 需要做卷积的列数 int kns = n - kn + 1; // 需要做卷积的行数 final int kms = m - km + 1; // 结果矩阵 final double[][] outMatrix = new double[kms][kns]; for (int i = 0; i < kms; i++) { for (int j = 0; j < kns; j++) { double sum = 0.0; for (int ki = 0; ki < km; ki++) { for (int kj = 0; kj < kn; kj++) sum += matrix[i + ki][j + kj] * kernel[ki][kj]; } outMatrix[i][j] = sum; } } return outMatrix; } public static void main(String args[]) { int subSampleLayerRow = 4; int subSampleLayerColumn = 4; int subSampleLayerK = 0; double[][] subSampleLayer = new double[subSampleLayerRow][subSampleLayerColumn]; for (int i = 0; i < subSampleLayerRow; ++i) for (int j = 0; j < subSampleLayerColumn; ++j) subSampleLayer[i][j] = ++subSampleLayerK; int kernelRow = 2; int kernelColumn = 2; double kernelK = 0.3; double[][] kernelMatrix = new double[kernelRow][kernelColumn]; for (int i = 0; i < kernelRow; ++i) for (int j = 0; j < kernelColumn; ++j){ kernelMatrix[i][j] = kernelK; kernelK += 0.1; } System.out.println(Arrays.deepToString(kernelMatrix).replaceAll("],", "]," + System.getProperty("line.separator"))); double[][] result = convnValid(subSampleLayer, kernelMatrix); System.out.println(Arrays.deepToString(result).replaceAll("],", "]," + System.getProperty("line.separator"))); }}
注意:卷积计算时,需要对kernel先旋转180度
从计算过程可以看出,卷积层输出的feature map中的神经元(0,0)值为7.2是由子采样层的输出feature map中的(0,0)值为1,(0,1)值为2,(1,0)值为5,(1,1)值为6生成,即子采样层的输出feature map中的(0,0)与卷积层
再如,卷积层输出的feature map中的神经元(0,1)值为9.0是由子采样层的输出feature map中的(0,1)值为2,(0,2)值为3,(1,1)值为6,(1,2)值为7生成,则即子采样层的输出feature map中的(0,1)与卷积层
再如,卷积层输出的feature map中的神经元(1,0)值为14.4是由子采样层的输出feature map中的(1,0)值为5,(1,1)值为6,(2,0)值为9,(2,1)值为10生成。
卷积层输出的feature map中的神经元(1,1)值为16.2是由子采样层的输出feature map中的(1,1)值为6,(1,2)值为7,(2,1)值为10,(2,2)值为11生成.
反过来,从这几部分可以看出,子采样层的输出feature map中的(1,1)值为6, 是与卷积层
由前一章节的BP算法推导结论可知
子采样层第j个节点的残差等于卷积层与其相连接的所有节点的权值乘以相应的残差的加权和再乘以该子采样层节点的导数。
由于子采样层节点(0,0)值为1只与卷积层
即
子采样层节点(0,1)值为2与卷积层
因为卷积层
子采样层节点(1,1)值为6与卷积层
以此类推可以得出子采样层的残差:
这个残差计算过程使用公式(5)描述,这其中跟卷积层的残差计算一样,有一个小技巧,用卷积层的残差矩阵与旋转180度后的卷积层kernel做full 模式的卷积,因为在mathlab中,计算卷积时,需要旋转180度后再进行卷积,而这个残差计算过程是不要旋转的,因此需要事先把它旋转180度。看看下面这个过程,就知道为什么不需要旋转180度。
设
conv的full模式需要把
即卷积层的delta(
扩展为
此时用旋转180度的kernel,与扩展后的
,
import java.util.Arrays;/** * Created by keliz on 7/7/16. */public class convFull { /** * 计算full模式的卷积 * * @param matrix * @param kernel * @return */ public static double[][] convnFull(double[][] matrix, final double[][] kernel) { int m = matrix.length; int n = matrix[0].length; final int km = kernel.length; final int kn = kernel[0].length; // 扩展矩阵 final double[][] extendMatrix = new double[m + 2 * (km - 1)][n + 2 * (kn - 1)]; for (int i = 0; i < m; i++) { for (int j = 0; j < n; j++) extendMatrix[i + km - 1][j + kn - 1] = matrix[i][j]; } return convnValid(extendMatrix, kernel); } /** * 计算valid模式的卷积 * * @param matrix * @param kernel * @return */ public static double[][] convnValid(final double[][] matrix, double[][] kernel) { kernel = rot180(kernel); int m = matrix.length; int n = matrix[0].length; final int km = kernel.length; final int kn = kernel[0].length; // 需要做卷积的列数 int kns = n - kn + 1; // 需要做卷积的行数 final int kms = m - km + 1; // 结果矩阵 final double[][] outMatrix = new double[kms][kns]; for (int i = 0; i < kms; i++) { for (int j = 0; j < kns; j++) { double sum = 0.0; for (int ki = 0; ki < km; ki++) { for (int kj = 0; kj < kn; kj++) sum += matrix[i + ki][j + kj] * kernel[ki][kj]; } outMatrix[i][j] = sum; } } return outMatrix; } /** * 复制矩阵 * * @param matrix * @return */ public static double[][] cloneMatrix(final double[][] matrix) { final int m = matrix.length; int n = matrix[0].length; final double[][] outMatrix = new double[m][n]; for (int i = 0; i < m; i++) { for (int j = 0; j < n; j++) { outMatrix[i][j] = matrix[i][j]; } } return outMatrix; } /** * 对矩阵进行180度旋转,是在matrix的副本上复制,不会对原来的矩阵进行修改 * * @param matrix */ public static double[][] rot180(double[][] matrix) { matrix = cloneMatrix(matrix); int m = matrix.length; int n = matrix[0].length; // 按列对称进行交换 for (int i = 0; i < m; i++) { for (int j = 0; j < n / 2; j++) { double tmp = matrix[i][j]; matrix[i][j] = matrix[i][n - 1 - j]; matrix[i][n - 1 - j] = tmp; } } // 按行对称进行交换 for (int j = 0; j < n; j++) { for (int i = 0; i < m / 2; i++) { double tmp = matrix[i][j]; matrix[i][j] = matrix[m - 1 - i][j]; matrix[m - 1 - i][j] = tmp; } } return matrix; } public static void main(String args[]) { int deltaRow = 3; int deltaColum = 3; double initDelta = 0.1; double[][] delta = new double[deltaRow][deltaColum]; for(int i = 0; i < deltaRow; ++i) for(int j = 0; j < deltaColum; ++j){ delta[i][j] = initDelta; initDelta += 0.1; } int kernelRow = 2; int kernelColum = 2; double initKernel = 0.3; double[][] kernel = new double[kernelRow][kernelColum]; for(int i = 0; i < kernelRow; ++i) for(int j = 0; j < kernelColum; ++j){ kernel[i][j] = initKernel; initKernel += 0.1; } double[][] result = convnFull(delta, kernel); System.out.println(Arrays.deepToString(result).replaceAll("],", "]," + System.getProperty("line.separator"))); }}
参考文献
JavaCNN
CNN公式推导
Notes onConvolutional Neural Networks
Notes onConvolutional Neural Networks论文翻译
- CNN 卷积神经网络-- 残差计算
- 卷积神经网络残差计算
- CNN卷积神经网络内存占用计算
- Deep Learning-TensorFlow (14) CNN卷积神经网络_深度残差网络 ResNet
- CNN 卷积神经网络 二维卷积
- 卷积神经网络CNN
- 看懂卷积神经网络(CNN)
- 卷积神经网络(CNN)
- 卷积神经网络(CNN)
- [CNN]卷积神经网络
- CNN卷积神经网络
- 卷积神经网络CNN
- 读懂卷积神经网络CNN
- CNN卷积神经网络
- 卷积神经网络CNN
- 卷积神经网络CNN
- CNN卷积神经网络
- 卷积神经网络CNN
- *.jar 与 *.aar 的生成与*.aar导入项目方法
- docker镜像相关命令
- Linux快速修改用户密码
- ios .h声明变量在@interface括号里和外面
- 理解Python中的with语句
- CNN 卷积神经网络-- 残差计算
- Wamp下的Apache无法启动的解决方法
- ListView的下拉刷新上拉加载以及带列的横向滚动
- ssl 申请流程
- 添加Library 和添加 Include
- java ant编译
- 不使用库函数,编写函数int strcmp(char *source, char *dest) 相等返回0,不等返回-1;
- leetcode Single Number I II III
- gradle 各类问题汇总