ABCNN代码注释

来源:互联网 发布:如何用手机淘宝装修 编辑:程序博客网 时间:2024/06/07 04:52

来源
https://github.com/galsang/ABCNN/edit/master/ABCNN.py

#!/usr/bin/env python# encoding: utf-8import tensorflow as tfimport numpy as npclass ABCNN():    def __init__(self, s, w, l2_reg, model_type, num_features, d0=300, di=50, num_classes=2, num_layers=2):        """        Implmenentaion of ABCNNs        (https://arxiv.org/pdf/1512.05193.pdf)        :param s: sentence length        :param w: filter width        :param l2_reg: L2 regularization coefficient        :param model_type: Type of the network(BCNN, ABCNN1, ABCNN2, ABCNN3).        :param num_features: The number of pre-set features(not coming from CNN) used in the output layer.        :param d0: dimensionality of word embedding(default: 300)        :param di: The number of convolution kernels (default: 50)        :param num_classes: The number of classes for answers.        :param num_layers: The number of convolution layers.        """        self.x1 = tf.placeholder(tf.float32, shape=[None, d0, s], name="x1")#Tensor大小为batch_size*d0*s,s是句子长度,都被padding成相同长度,d0是embedding维度        self.x2 = tf.placeholder(tf.float32, shape=[None, d0, s], name="x2")#Tensor大小为batch_size*d0*s,s是句子长度,另一个句子也被padding成相同长度        self.y = tf.placeholder(tf.int32, shape=[None], name="y")#label,size为batch_size        self.features = tf.placeholder(tf.float32, shape=[None, num_features], name="features")#feature,size是batch_size*num_features        # zero padding to inputs for wide convolution        # tf.pad()是将tensor填充,填充的参数是一个张量,代表每一维(开始、结束)填充多少行/列,但是有一个要求它的rank一定要和tensor的rank是一样的,        def pad_for_wide_conv(x):            return tf.pad(x, np.array([[0, 0], [0, 0], [w - 1, w - 1], [0, 0]]), "CONSTANT", name="pad_wide_conv")        def cos_sim(v1, v2):            norm1 = tf.sqrt(tf.reduce_sum(tf.square(v1), axis=1))#在第一维上求和            norm2 = tf.sqrt(tf.reduce_sum(tf.square(v2), axis=1))            dot_products = tf.reduce_sum(v1 * v2, axis=1, name="cos_sim")            return dot_products / (norm1 * norm2)        def euclidean_score(v1, v2):            euclidean = tf.sqrt(tf.reduce_sum(tf.square(v1 - v2), axis=1))            return 1 / (1 + euclidean)        def make_attention_mat(x1, x2):            # x1, x2 = [batch, height, width, 1] = [batch, d, s, 1]            # x2 => [batch, height, 1, width]            # [batch, width, wdith] = [batch, s, s]            euclidean = tf.sqrt(tf.reduce_sum(tf.square(x1 - tf.matrix_transpose(x2)), axis=1))            return 1 / (1 + euclidean)        def convolution(name_scope, x, d, reuse):            with tf.name_scope(name_scope + "-conv"):                with tf.variable_scope("conv") as scope:                    conv = tf.contrib.layers.conv2d(                        inputs=x,                        num_outputs=di,                        kernel_size=(d, w),                        stride=1,                        padding="VALID",                        activation_fn=tf.nn.tanh,                        weights_initializer=tf.contrib.layers.xavier_initializer_conv2d(),                        weights_regularizer=tf.contrib.layers.l2_regularizer(scale=l2_reg),                        biases_initializer=tf.constant_initializer(1e-04),                        reuse=reuse,                        trainable=True,                        scope=scope                    )                    # Weight: [filter_height, filter_width, in_channels, out_channels]                    # output: [batch, 1, input_width+filter_Width-1, out_channels] == [batch, 1, s+w-1, di]                    # [batch, di, s+w-1, 1]                    conv_trans = tf.transpose(conv, [0, 3, 2, 1], name="conv_trans")                    return conv_trans        def w_pool(variable_scope, x, attention):            # x: [batch, di, s+w-1, 1]            # attention: [batch, s+w-1]            with tf.variable_scope(variable_scope + "-w_pool"):                if model_type == "ABCNN2" or model_type == "ABCNN3":                    pools = []                    # [batch, s+w-1] => [batch, 1, s+w-1, 1]                    attention = tf.transpose(tf.expand_dims(tf.expand_dims(attention, -1), -1), [0, 2, 1, 3])                    for i in range(s):                        # [batch, di, w, 1], [batch, 1, w, 1] => [batch, di, 1, 1]                        pools.append(tf.reduce_sum(x[:, :, i:i + w, :] * attention[:, :, i:i + w, :],                                                   axis=2,                                                   keep_dims=True))                    # [batch, di, s, 1]                    w_ap = tf.concat(pools, axis=2, name="w_ap")                else:                    w_ap = tf.layers.average_pooling2d(                        inputs=x,                        # (pool_height, pool_width)                        pool_size=(1, w),                        strides=1,                        padding="VALID",                        name="w_ap"                    )                    # [batch, di, s, 1]                return w_ap        def all_pool(variable_scope, x):            with tf.variable_scope(variable_scope + "-all_pool"):                if variable_scope.startswith("input"):                    pool_width = s                    d = d0                else:                    pool_width = s + w - 1                    d = di                all_ap = tf.layers.average_pooling2d(                    inputs=x,                    # (pool_height, pool_width)                    pool_size=(1, pool_width),                    strides=1,                    padding="VALID",                    name="all_ap"                )                # [batch, di, 1, 1]                # [batch, di]                all_ap_reshaped = tf.reshape(all_ap, [-1, d])                #all_ap_reshaped = tf.squeeze(all_ap, [2, 3])                return all_ap_reshaped        def CNN_layer(variable_scope, x1, x2, d):            # x1, x2 = [batch, d, s, 1]            with tf.variable_scope(variable_scope):                if model_type == "ABCNN1" or model_type == "ABCNN3":                    with tf.name_scope("att_mat"):                        aW = tf.get_variable(name="aW",                                             shape=(s, d),                                             initializer=tf.contrib.layers.xavier_initializer(),                                             regularizer=tf.contrib.layers.l2_regularizer(scale=l2_reg))                        # [batch, s, s]                        att_mat = make_attention_mat(x1, x2)                        # [batch, s, s] * [s,d] => [batch, s, d]                        # matrix transpose => [batch, d, s]                        # expand dims => [batch, d, s, 1]                        x1_a = tf.expand_dims(tf.matrix_transpose(tf.einsum("ijk,kl->ijl", att_mat, aW)), -1)                        x2_a = tf.expand_dims(tf.matrix_transpose(                            tf.einsum("ijk,kl->ijl", tf.matrix_transpose(att_mat), aW)), -1)                        # [batch, d, s, 2]                        x1 = tf.concat([x1, x1_a], axis=3)                        x2 = tf.concat([x2, x2_a], axis=3)                left_conv = convolution(name_scope="left", x=pad_for_wide_conv(x1), d=d, reuse=False)                right_conv = convolution(name_scope="right", x=pad_for_wide_conv(x2), d=d, reuse=True)                left_attention, right_attention = None, None                if model_type == "ABCNN2" or model_type == "ABCNN3":                    # [batch, s+w-1, s+w-1]                    att_mat = make_attention_mat(left_conv, right_conv)                    # [batch, s+w-1], [batch, s+w-1]                    left_attention, right_attention = tf.reduce_sum(att_mat, axis=2), tf.reduce_sum(att_mat, axis=1)                left_wp = w_pool(variable_scope="left", x=left_conv, attention=left_attention)                left_ap = all_pool(variable_scope="left", x=left_conv)                right_wp = w_pool(variable_scope="right", x=right_conv, attention=right_attention)                right_ap = all_pool(variable_scope="right", x=right_conv)                return left_wp, left_ap, right_wp, right_ap        x1_expanded = tf.expand_dims(self.x1, -1)        x2_expanded = tf.expand_dims(self.x2, -1)        LO_0 = all_pool(variable_scope="input-left", x=x1_expanded)        RO_0 = all_pool(variable_scope="input-right", x=x2_expanded)        LI_1, LO_1, RI_1, RO_1 = CNN_layer(variable_scope="CNN-1", x1=x1_expanded, x2=x2_expanded, d=d0)        sims = [cos_sim(LO_0, RO_0), cos_sim(LO_1, RO_1)]        if num_layers > 1:            _, LO_2, _, RO_2 = CNN_layer(variable_scope="CNN-2", x1=LI_1, x2=RI_1, d=di)            self.test = LO_2            self.test2 = RO_2            sims.append(cos_sim(LO_2, RO_2))        with tf.variable_scope("output-layer"):            self.output_features = tf.concat([self.features, tf.stack(sims, axis=1)], axis=1, name="output_features")            self.estimation = tf.contrib.layers.fully_connected(                inputs=self.output_features,                num_outputs=num_classes,                activation_fn=None,                weights_initializer=tf.contrib.layers.xavier_initializer(),                weights_regularizer=tf.contrib.layers.l2_regularizer(scale=l2_reg),                biases_initializer=tf.constant_initializer(1e-04),                scope="FC"            )        self.prediction = tf.contrib.layers.softmax(self.estimation)[:, 1]        self.cost = tf.add(            tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.estimation, labels=self.y)),            tf.reduce_sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)),            name="cost")        tf.summary.scalar("cost", self.cost)        self.merged = tf.summary.merge_all()        print("=" * 50)        print("List of Variables:")        for v in tf.trainable_variables():            print(v.name)        print("=" * 50)