ABCNN代码注释
来源:互联网 发布:如何用手机淘宝装修 编辑:程序博客网 时间:2024/06/07 04:52
来源
https://github.com/galsang/ABCNN/edit/master/ABCNN.py
#!/usr/bin/env python# encoding: utf-8import tensorflow as tfimport numpy as npclass ABCNN(): def __init__(self, s, w, l2_reg, model_type, num_features, d0=300, di=50, num_classes=2, num_layers=2): """ Implmenentaion of ABCNNs (https://arxiv.org/pdf/1512.05193.pdf) :param s: sentence length :param w: filter width :param l2_reg: L2 regularization coefficient :param model_type: Type of the network(BCNN, ABCNN1, ABCNN2, ABCNN3). :param num_features: The number of pre-set features(not coming from CNN) used in the output layer. :param d0: dimensionality of word embedding(default: 300) :param di: The number of convolution kernels (default: 50) :param num_classes: The number of classes for answers. :param num_layers: The number of convolution layers. """ self.x1 = tf.placeholder(tf.float32, shape=[None, d0, s], name="x1")#Tensor大小为batch_size*d0*s,s是句子长度,都被padding成相同长度,d0是embedding维度 self.x2 = tf.placeholder(tf.float32, shape=[None, d0, s], name="x2")#Tensor大小为batch_size*d0*s,s是句子长度,另一个句子也被padding成相同长度 self.y = tf.placeholder(tf.int32, shape=[None], name="y")#label,size为batch_size self.features = tf.placeholder(tf.float32, shape=[None, num_features], name="features")#feature,size是batch_size*num_features # zero padding to inputs for wide convolution # tf.pad()是将tensor填充,填充的参数是一个张量,代表每一维(开始、结束)填充多少行/列,但是有一个要求它的rank一定要和tensor的rank是一样的, def pad_for_wide_conv(x): return tf.pad(x, np.array([[0, 0], [0, 0], [w - 1, w - 1], [0, 0]]), "CONSTANT", name="pad_wide_conv") def cos_sim(v1, v2): norm1 = tf.sqrt(tf.reduce_sum(tf.square(v1), axis=1))#在第一维上求和 norm2 = tf.sqrt(tf.reduce_sum(tf.square(v2), axis=1)) dot_products = tf.reduce_sum(v1 * v2, axis=1, name="cos_sim") return dot_products / (norm1 * norm2) def euclidean_score(v1, v2): euclidean = tf.sqrt(tf.reduce_sum(tf.square(v1 - v2), axis=1)) return 1 / (1 + euclidean) def make_attention_mat(x1, x2): # x1, x2 = [batch, height, width, 1] = [batch, d, s, 1] # x2 => [batch, height, 1, width] # [batch, width, wdith] = [batch, s, s] euclidean = tf.sqrt(tf.reduce_sum(tf.square(x1 - tf.matrix_transpose(x2)), axis=1)) return 1 / (1 + euclidean) def convolution(name_scope, x, d, reuse): with tf.name_scope(name_scope + "-conv"): with tf.variable_scope("conv") as scope: conv = tf.contrib.layers.conv2d( inputs=x, num_outputs=di, kernel_size=(d, w), stride=1, padding="VALID", activation_fn=tf.nn.tanh, weights_initializer=tf.contrib.layers.xavier_initializer_conv2d(), weights_regularizer=tf.contrib.layers.l2_regularizer(scale=l2_reg), biases_initializer=tf.constant_initializer(1e-04), reuse=reuse, trainable=True, scope=scope ) # Weight: [filter_height, filter_width, in_channels, out_channels] # output: [batch, 1, input_width+filter_Width-1, out_channels] == [batch, 1, s+w-1, di] # [batch, di, s+w-1, 1] conv_trans = tf.transpose(conv, [0, 3, 2, 1], name="conv_trans") return conv_trans def w_pool(variable_scope, x, attention): # x: [batch, di, s+w-1, 1] # attention: [batch, s+w-1] with tf.variable_scope(variable_scope + "-w_pool"): if model_type == "ABCNN2" or model_type == "ABCNN3": pools = [] # [batch, s+w-1] => [batch, 1, s+w-1, 1] attention = tf.transpose(tf.expand_dims(tf.expand_dims(attention, -1), -1), [0, 2, 1, 3]) for i in range(s): # [batch, di, w, 1], [batch, 1, w, 1] => [batch, di, 1, 1] pools.append(tf.reduce_sum(x[:, :, i:i + w, :] * attention[:, :, i:i + w, :], axis=2, keep_dims=True)) # [batch, di, s, 1] w_ap = tf.concat(pools, axis=2, name="w_ap") else: w_ap = tf.layers.average_pooling2d( inputs=x, # (pool_height, pool_width) pool_size=(1, w), strides=1, padding="VALID", name="w_ap" ) # [batch, di, s, 1] return w_ap def all_pool(variable_scope, x): with tf.variable_scope(variable_scope + "-all_pool"): if variable_scope.startswith("input"): pool_width = s d = d0 else: pool_width = s + w - 1 d = di all_ap = tf.layers.average_pooling2d( inputs=x, # (pool_height, pool_width) pool_size=(1, pool_width), strides=1, padding="VALID", name="all_ap" ) # [batch, di, 1, 1] # [batch, di] all_ap_reshaped = tf.reshape(all_ap, [-1, d]) #all_ap_reshaped = tf.squeeze(all_ap, [2, 3]) return all_ap_reshaped def CNN_layer(variable_scope, x1, x2, d): # x1, x2 = [batch, d, s, 1] with tf.variable_scope(variable_scope): if model_type == "ABCNN1" or model_type == "ABCNN3": with tf.name_scope("att_mat"): aW = tf.get_variable(name="aW", shape=(s, d), initializer=tf.contrib.layers.xavier_initializer(), regularizer=tf.contrib.layers.l2_regularizer(scale=l2_reg)) # [batch, s, s] att_mat = make_attention_mat(x1, x2) # [batch, s, s] * [s,d] => [batch, s, d] # matrix transpose => [batch, d, s] # expand dims => [batch, d, s, 1] x1_a = tf.expand_dims(tf.matrix_transpose(tf.einsum("ijk,kl->ijl", att_mat, aW)), -1) x2_a = tf.expand_dims(tf.matrix_transpose( tf.einsum("ijk,kl->ijl", tf.matrix_transpose(att_mat), aW)), -1) # [batch, d, s, 2] x1 = tf.concat([x1, x1_a], axis=3) x2 = tf.concat([x2, x2_a], axis=3) left_conv = convolution(name_scope="left", x=pad_for_wide_conv(x1), d=d, reuse=False) right_conv = convolution(name_scope="right", x=pad_for_wide_conv(x2), d=d, reuse=True) left_attention, right_attention = None, None if model_type == "ABCNN2" or model_type == "ABCNN3": # [batch, s+w-1, s+w-1] att_mat = make_attention_mat(left_conv, right_conv) # [batch, s+w-1], [batch, s+w-1] left_attention, right_attention = tf.reduce_sum(att_mat, axis=2), tf.reduce_sum(att_mat, axis=1) left_wp = w_pool(variable_scope="left", x=left_conv, attention=left_attention) left_ap = all_pool(variable_scope="left", x=left_conv) right_wp = w_pool(variable_scope="right", x=right_conv, attention=right_attention) right_ap = all_pool(variable_scope="right", x=right_conv) return left_wp, left_ap, right_wp, right_ap x1_expanded = tf.expand_dims(self.x1, -1) x2_expanded = tf.expand_dims(self.x2, -1) LO_0 = all_pool(variable_scope="input-left", x=x1_expanded) RO_0 = all_pool(variable_scope="input-right", x=x2_expanded) LI_1, LO_1, RI_1, RO_1 = CNN_layer(variable_scope="CNN-1", x1=x1_expanded, x2=x2_expanded, d=d0) sims = [cos_sim(LO_0, RO_0), cos_sim(LO_1, RO_1)] if num_layers > 1: _, LO_2, _, RO_2 = CNN_layer(variable_scope="CNN-2", x1=LI_1, x2=RI_1, d=di) self.test = LO_2 self.test2 = RO_2 sims.append(cos_sim(LO_2, RO_2)) with tf.variable_scope("output-layer"): self.output_features = tf.concat([self.features, tf.stack(sims, axis=1)], axis=1, name="output_features") self.estimation = tf.contrib.layers.fully_connected( inputs=self.output_features, num_outputs=num_classes, activation_fn=None, weights_initializer=tf.contrib.layers.xavier_initializer(), weights_regularizer=tf.contrib.layers.l2_regularizer(scale=l2_reg), biases_initializer=tf.constant_initializer(1e-04), scope="FC" ) self.prediction = tf.contrib.layers.softmax(self.estimation)[:, 1] self.cost = tf.add( tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.estimation, labels=self.y)), tf.reduce_sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)), name="cost") tf.summary.scalar("cost", self.cost) self.merged = tf.summary.merge_all() print("=" * 50) print("List of Variables:") for v in tf.trainable_variables(): print(v.name) print("=" * 50)
阅读全文
0 0
- ABCNN代码注释
- 代码注释
- 注释代码
- 注释代码
- 代码注释
- 代码注释
- 代码注释
- 代码注释
- 代码注释
- 代码注释
- 代码注释
- 代码注释
- 代码注释
- 代码注释
- eclipse 注释掉代码,取消代码注释
- JAVA代码注释范例
- 经典代码注释
- HTML代码注释
- STM32之GPIO口
- git学习——Github关联(2)
- python多进程
- caffe实现多标签输入(multilabel、multitask)
- github fork源仓库后保持同步
- ABCNN代码注释
- 使用DataGrip连接SQL Server 2017数据库
- 2-Java中如何创建进程
- 面部特征点定位概述及最近研究进展
- [work]caffe实现多label输入(修改源码版)
- 利用Caffe做回归(regression)
- 你真的了解宏吗:浅谈宏定义(#define语句)
- 3-Java中如何创建线程
- Wannafly挑战赛5 D.子序列 组合数学