【分布式Tensorflow(0.11.0)问题 未解决】Segmentation fault (core dumped)

来源:互联网 发布:手机没有移动网络信号 编辑:程序博客网 时间:2024/06/04 01:37

有三个测试,主函数是基本一样的,就是模型不同,但是均以 Segmentation fault (core dumped) 出错。


在我上一篇问题记录里,是以dummy数据集测试的,只有前向计算,没有参数更新和优化等操作,因此重新写了一个脚本,使用真实的数据集。


train数据集:

960831张图片(224*224),已转换为97个tfrecords文件,如下所示:

[root@dl1 train]# lstrain_224_0.tfrecords   train_224_32.tfrecords  train_224_55.tfrecords  train_224_78.tfrecordstrain_224_10.tfrecords  train_224_33.tfrecords  train_224_56.tfrecords  train_224_79.tfrecordstrain_224_11.tfrecords  train_224_34.tfrecords  train_224_57.tfrecords  train_224_7.tfrecordstrain_224_12.tfrecords  train_224_35.tfrecords  train_224_58.tfrecords  train_224_80.tfrecordstrain_224_13.tfrecords  train_224_36.tfrecords  train_224_59.tfrecords  train_224_81.tfrecordstrain_224_14.tfrecords  train_224_37.tfrecords  train_224_5.tfrecords   train_224_82.tfrecordstrain_224_15.tfrecords  train_224_38.tfrecords  train_224_60.tfrecords  train_224_83.tfrecordstrain_224_16.tfrecords  train_224_39.tfrecords  train_224_61.tfrecords  train_224_84.tfrecordstrain_224_17.tfrecords  train_224_3.tfrecords   train_224_62.tfrecords  train_224_85.tfrecordstrain_224_18.tfrecords  train_224_40.tfrecords  train_224_63.tfrecords  train_224_86.tfrecordstrain_224_19.tfrecords  train_224_41.tfrecords  train_224_64.tfrecords  train_224_87.tfrecordstrain_224_1.tfrecords   train_224_42.tfrecords  train_224_65.tfrecords  train_224_88.tfrecordstrain_224_20.tfrecords  train_224_43.tfrecords  train_224_66.tfrecords  train_224_89.tfrecordstrain_224_21.tfrecords  train_224_44.tfrecords  train_224_67.tfrecords  train_224_8.tfrecordstrain_224_22.tfrecords  train_224_45.tfrecords  train_224_68.tfrecords  train_224_90.tfrecordstrain_224_23.tfrecords  train_224_46.tfrecords  train_224_69.tfrecords  train_224_91.tfrecordstrain_224_24.tfrecords  train_224_47.tfrecords  train_224_6.tfrecords   train_224_92.tfrecordstrain_224_25.tfrecords  train_224_48.tfrecords  train_224_70.tfrecords  train_224_93.tfrecordstrain_224_26.tfrecords  train_224_49.tfrecords  train_224_71.tfrecords  train_224_94.tfrecordstrain_224_27.tfrecords  train_224_4.tfrecords   train_224_72.tfrecords  train_224_95.tfrecordstrain_224_28.tfrecords  train_224_50.tfrecords  train_224_73.tfrecords  train_224_96.tfrecordstrain_224_29.tfrecords  train_224_51.tfrecords  train_224_74.tfrecords  train_224_9.tfrecordstrain_224_2.tfrecords   train_224_52.tfrecords  train_224_75.tfrecords  train_224_image_mean.npytrain_224_30.tfrecords  train_224_53.tfrecords  train_224_76.tfrecordstrain_224_31.tfrecords  train_224_54.tfrecords  train_224_77.tfrecords


Main函数:

def main(_):  ps_hosts = FLAGS.ps_hosts.split(",")  worker_hosts = FLAGS.worker_hosts.split(",")  cluster = tf.train.ClusterSpec({"ps": ps_hosts, "worker": worker_hosts})  server =   tf.train.Server(cluster,job_name=FLAGS.job_name,task_index=FLAGS.task_index)  issync = FLAGS.issync  if FLAGS.job_name == "ps":    server.join()  elif FLAGS.job_name == "worker":    images, labels = ...    with tf.device(tf.train.replica_device_setter(                worker_device="/job:worker/task:%d" % FLAGS.task_index,                cluster=cluster)):      global_step = tf.Variable(0, name='global_step', trainable=False)      # 修改这里,调用不同的模型      logits, parameters = inference(images)      logits = tf.contrib.layers.flatten(logits)      cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits, name='xentropy')      loss_value = tf.reduce_mean(cross_entropy, name='xentropy_mean')             optimizer = tf.train.GradientDescentOptimizer(learning_rate)             grads_and_vars = optimizer.compute_gradients(loss_value)      if issync == 1:        # Synchronous mode        rep_op = tf.train.SyncReplicasOptimizer(optimizer,                                            replicas_to_aggregate=len(                                              worker_hosts),                                            replica_id=FLAGS.task_index,                                            total_num_replicas=len(                                              worker_hosts),                                            use_locking=True)        train_op = rep_op.apply_gradients(grads_and_vars, global_step=global_step)        init_token_op = rep_op.get_init_tokens_op()        chief_queue_runner = rep_op.get_chief_queue_runner()      else:        # Asynchronous mode        train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)      init_op = tf.initialize_all_variables()      saver = tf.train.Saver()      tf.summary.scalar('cost', loss_value)      summary_op = tf.summary.merge_all()    sv = tf.train.Supervisor(is_chief=(FLAGS.task_index == 0),                        logdir="./alexnet_checkpoint",                        init_op=init_op,                        summary_op=None,                        saver=saver,                        global_step=global_step,                        save_model_secs=60)    with sv.prepare_or_wait_for_session(server.target) as sess:      # Sync      if FLAGS.task_index == 0 and issync == 1:        sv.start_queue_runners(sess, [chief_queue_runner])        sess.run(init_token_op)      step = 0      while not sv.should_stop():        try:          start_time = time.time()               _, loss_v, step = sess.run([train_op, loss_value, global_step])          if step > 1000:            break          duration = time.time() - start_time          if step >= 10:            if not step % 10:                           print ('%s: step %d, duration = %.3f' % (datetime.now(), step, duration))                 except tf.errors.OutOfRangeError:          print('Done training -- epoch limit reached')    sv.stop()


测试1,alexnet模型,参考这里

def print_activations(t):  print(t.op.name, ' ', t.get_shape().as_list())def inference(images):  """Build the AlexNet model.  Args:    images: Images Tensor  Returns:    pool5: the last Tensor in the convolutional component of AlexNet.    parameters: a list of Tensors corresponding to the weights and biases of the        AlexNet model.  """  parameters = []  # conv1  with tf.name_scope('conv1') as scope:    kernel = tf.Variable(tf.truncated_normal([11, 11, 3, 64], dtype=tf.float32,                                             stddev=1e-1), name='weights')    conv = tf.nn.conv2d(images, kernel, [1, 4, 4, 1], padding='SAME')    biases = tf.Variable(tf.constant(0.0, shape=[64], dtype=tf.float32),                         trainable=True, name='biases')    bias = tf.nn.bias_add(conv, biases)    conv1 = tf.nn.relu(bias, name=scope)    print_activations(conv1)    parameters += [kernel, biases]  # lrn1  # TODO(shlens, jiayq): Add a GPU version of local response normalization.  # pool1  pool1 = tf.nn.max_pool(conv1,                         ksize=[1, 3, 3, 1],                         strides=[1, 2, 2, 1],                         padding='VALID',                         name='pool1')  print_activations(pool1)  # conv2  with tf.name_scope('conv2') as scope:    kernel = tf.Variable(tf.truncated_normal([5, 5, 64, 192], dtype=tf.float32,                                             stddev=1e-1), name='weights')    conv = tf.nn.conv2d(pool1, kernel, [1, 1, 1, 1], padding='SAME')    biases = tf.Variable(tf.constant(0.0, shape=[192], dtype=tf.float32),                         trainable=True, name='biases')    bias = tf.nn.bias_add(conv, biases)    conv2 = tf.nn.relu(bias, name=scope)    parameters += [kernel, biases]  print_activations(conv2)  # pool2  pool2 = tf.nn.max_pool(conv2,                         ksize=[1, 3, 3, 1],                         strides=[1, 2, 2, 1],                         padding='VALID',                         name='pool2')  print_activations(pool2)  # conv3  with tf.name_scope('conv3') as scope:    kernel = tf.Variable(tf.truncated_normal([3, 3, 192, 384],                                             dtype=tf.float32,                                             stddev=1e-1), name='weights')    conv = tf.nn.conv2d(pool2, kernel, [1, 1, 1, 1], padding='SAME')    biases = tf.Variable(tf.constant(0.0, shape=[384], dtype=tf.float32),                         trainable=True, name='biases')    bias = tf.nn.bias_add(conv, biases)    conv3 = tf.nn.relu(bias, name=scope)    parameters += [kernel, biases]    print_activations(conv3)  # conv4  with tf.name_scope('conv4') as scope:    kernel = tf.Variable(tf.truncated_normal([3, 3, 384, 256],                                             dtype=tf.float32,                                             stddev=1e-1), name='weights')    conv = tf.nn.conv2d(conv3, kernel, [1, 1, 1, 1], padding='SAME')    biases = tf.Variable(tf.constant(0.0, shape=[256], dtype=tf.float32),                         trainable=True, name='biases')    bias = tf.nn.bias_add(conv, biases)    conv4 = tf.nn.relu(bias, name=scope)    parameters += [kernel, biases]    print_activations(conv4)  # conv5  with tf.name_scope('conv5') as scope:    kernel = tf.Variable(tf.truncated_normal([3, 3, 256, 256],                                             dtype=tf.float32,                                             stddev=1e-1), name='weights')    conv = tf.nn.conv2d(conv4, kernel, [1, 1, 1, 1], padding='SAME')    biases = tf.Variable(tf.constant(0.0, shape=[256], dtype=tf.float32),                         trainable=True, name='biases')    bias = tf.nn.bias_add(conv, biases)    conv5 = tf.nn.relu(bias, name=scope)    parameters += [kernel, biases]    print_activations(conv5)  # pool5  pool5 = tf.nn.max_pool(conv5,                         ksize=[1, 3, 3, 1],                         strides=[1, 2, 2, 1],                         padding='VALID',                         name='pool5')  print_activations(pool5)  return pool5, parameters

由于出错信息没有具体的输出,也不知道是哪一个步骤出错,因此我打印每个步骤后,发现错误是出现在这一句:

saver = tf.train.Saver()

运行到这一步的时候就突然 Segmentation fault (core dumped)


测试2:alexnet模型,参考这里,这是slim模块里的alexnet_v2模型。

def alexnet_v2_arg_scope(weight_decay=0.0005):  with slim.arg_scope([slim.conv2d, slim.fully_connected],                      activation_fn=tf.nn.relu,                      biases_initializer=tf.constant_initializer(0.1),                      weights_regularizer=slim.l2_regularizer(weight_decay)):    with slim.arg_scope([slim.conv2d], padding='SAME'):      with slim.arg_scope([slim.max_pool2d], padding='VALID') as arg_sc:        return arg_scdef alexnet_v2(inputs,               num_classes=1000,               is_training=True,               dropout_keep_prob=0.5,               spatial_squeeze=True,               scope='alexnet_v2'):  with tf.variable_scope(scope, 'alexnet_v2', [inputs]) as sc:    end_points_collection = sc.name + '_end_points'    # Collect outputs for conv2d, fully_connected and max_pool2d.    with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d],                        outputs_collections=[end_points_collection]):      net = slim.conv2d(inputs, 64, [11, 11], 4, padding='VALID',                        scope='conv1')      net = slim.max_pool2d(net, [3, 3], 2, scope='pool1')      net = slim.conv2d(net, 192, [5, 5], scope='conv2')      net = slim.max_pool2d(net, [3, 3], 2, scope='pool2')      net = slim.conv2d(net, 384, [3, 3], scope='conv3')      net = slim.conv2d(net, 384, [3, 3], scope='conv4')      net = slim.conv2d(net, 256, [3, 3], scope='conv5')      net = slim.max_pool2d(net, [3, 3], 2, scope='pool5')      # Use conv2d instead of fully_connected layers.      with slim.arg_scope([slim.conv2d],                          weights_initializer=trunc_normal(0.005),                          biases_initializer=tf.constant_initializer(0.1)):        net = slim.conv2d(net, 4096, [5, 5], padding='VALID',                          scope='fc6')        net = slim.dropout(net, dropout_keep_prob, is_training=is_training,                           scope='dropout6')        net = slim.conv2d(net, 4096, [1, 1], scope='fc7')        net = slim.dropout(net, dropout_keep_prob, is_training=is_training,                           scope='dropout7')        net = slim.conv2d(net, num_classes, [1, 1],                          activation_fn=None,                          normalizer_fn=None,                          biases_initializer=tf.zeros_initializer,                          scope='fc8')      # Convert end_points_collection into a end_point dict.      end_points = slim.utils.convert_collection_to_dict(end_points_collection)      if spatial_squeeze:        net = tf.squeeze(net, [1, 2], name='fc8/squeezed')        end_points[sc.name + '/fc8'] = net      return net, end_points

同样是 Segmentation fault (core dumped) 的错误,错误在这一句:

        net = slim.conv2d(net, num_classes, [1, 1],                      activation_fn=None,                      normalizer_fn=None,                      biases_initializer=tf.zeros_initializer(),                      scope='fc8')


测试3:googlenet模型,参考这里,这是slim模块的inception_v1模型

def incepton_v1_arg_scope(weight_decay = 0.00004, stddev = 0.1, batch_norm_var_collection = 'moving_vars'):    batch_norm_params = {            'decay': 0.9997,            'epsilon': 0.001,            'updates_collections': tf.GraphKeys.UPDATE_OPS,            'variables_collections': {                    'beta': None,                    'gamma': None,                    'moving_mean': [batch_norm_var_collection],                    'moving_variance': [batch_norm_var_collection],            }        }    with slim.arg_scope([slim.conv2d, slim.fully_connected], weights_regularizer = slim.l2_regularizer(weight_decay)):        with slim.arg_scope([slim.conv2d],                            weights_initializer = tf.truncated_normal_initializer(stddev = stddev),                            activation_fn = tf.nn.relu,                            normalizer_fn = slim.batch_norm,                            normalizer_params = batch_norm_params) as sc:            return scdef inception_v1_base(inputs,                      final_endpoint='Mixed_5c',                      scope='InceptionV1'):  end_points = {}  with tf.variable_scope(scope, 'InceptionV1', [inputs]):    with slim.arg_scope([slim.conv2d, slim.fully_connected],                weights_initializer=trunc_normal(0.01)):      with slim.arg_scope([slim.conv2d, slim.max_pool2d],                          stride=1, padding='SAME'):         end_point = 'Conv2d_1a_7x7'        net = slim.conv2d(inputs, 64, [7, 7], stride=2, scope=end_point)        end_points[end_point] = net        if final_endpoint == end_point: return net, end_points        end_point = 'MaxPool_2a_3x3'        net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point)        end_points[end_point] = net        if final_endpoint == end_point: return net, end_points        end_point = 'Conv2d_2b_1x1'        net = slim.conv2d(net, 64, [1, 1], scope=end_point)        end_points[end_point] = net        if final_endpoint == end_point: return net, end_points        end_point = 'Conv2d_2c_3x3'        net = slim.conv2d(net, 192, [3, 3], scope=end_point)        end_points[end_point] = net        if final_endpoint == end_point: return net, end_points        end_point = 'MaxPool_3a_3x3'        net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point)        end_points[end_point] = net        if final_endpoint == end_point: return net, end_points        end_point = 'Mixed_3b'        with tf.variable_scope(end_point):          with tf.variable_scope('Branch_0'):            branch_0 = slim.conv2d(net, 64, [1, 1], scope='Conv2d_0a_1x1')          with tf.variable_scope('Branch_1'):            branch_1 = slim.conv2d(net, 96, [1, 1], scope='Conv2d_0a_1x1')            branch_1 = slim.conv2d(branch_1, 128, [3, 3], scope='Conv2d_0b_3x3')          with tf.variable_scope('Branch_2'):            branch_2 = slim.conv2d(net, 16, [1, 1], scope='Conv2d_0a_1x1')            branch_2 = slim.conv2d(branch_2, 32, [3, 3], scope='Conv2d_0b_3x3')          with tf.variable_scope('Branch_3'):            branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')            branch_3 = slim.conv2d(branch_3, 32, [1, 1], scope='Conv2d_0b_1x1')          net = tf.concat(3, values=[branch_0, branch_1, branch_2, branch_3])        end_points[end_point] = net        if final_endpoint == end_point: return net, end_points        end_point = 'Mixed_3c'        with tf.variable_scope(end_point):          with tf.variable_scope('Branch_0'):            branch_0 = slim.conv2d(net, 128, [1, 1], scope='Conv2d_0a_1x1')          with tf.variable_scope('Branch_1'):            branch_1 = slim.conv2d(net, 128, [1, 1], scope='Conv2d_0a_1x1')            branch_1 = slim.conv2d(branch_1, 192, [3, 3], scope='Conv2d_0b_3x3')          with tf.variable_scope('Branch_2'):            branch_2 = slim.conv2d(net, 32, [1, 1], scope='Conv2d_0a_1x1')            branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='Conv2d_0b_3x3')          with tf.variable_scope('Branch_3'):            branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')            branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1')          net = tf.concat(3, values=[branch_0, branch_1, branch_2, branch_3])        end_points[end_point] = net        if final_endpoint == end_point: return net, end_points        end_point = 'MaxPool_4a_3x3'        net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point)        end_points[end_point] = net        if final_endpoint == end_point: return net, end_points        end_point = 'Mixed_4b'        with tf.variable_scope(end_point):          with tf.variable_scope('Branch_0'):            branch_0 = slim.conv2d(net, 192, [1, 1], scope='Conv2d_0a_1x1')          with tf.variable_scope('Branch_1'):            branch_1 = slim.conv2d(net, 96, [1, 1], scope='Conv2d_0a_1x1')            branch_1 = slim.conv2d(branch_1, 208, [3, 3], scope='Conv2d_0b_3x3')          with tf.variable_scope('Branch_2'):            branch_2 = slim.conv2d(net, 16, [1, 1], scope='Conv2d_0a_1x1')            branch_2 = slim.conv2d(branch_2, 48, [3, 3], scope='Conv2d_0b_3x3')          with tf.variable_scope('Branch_3'):            branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')            branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1')          net = tf.concat(3, values=[branch_0, branch_1, branch_2, branch_3])        end_points[end_point] = net        if final_endpoint == end_point: return net, end_points        end_point = 'Mixed_4c'        with tf.variable_scope(end_point):          with tf.variable_scope('Branch_0'):            branch_0 = slim.conv2d(net, 160, [1, 1], scope='Conv2d_0a_1x1')          with tf.variable_scope('Branch_1'):            branch_1 = slim.conv2d(net, 112, [1, 1], scope='Conv2d_0a_1x1')            branch_1 = slim.conv2d(branch_1, 224, [3, 3], scope='Conv2d_0b_3x3')          with tf.variable_scope('Branch_2'):            branch_2 = slim.conv2d(net, 24, [1, 1], scope='Conv2d_0a_1x1')            branch_2 = slim.conv2d(branch_2, 64, [3, 3], scope='Conv2d_0b_3x3')          with tf.variable_scope('Branch_3'):            branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')            branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1')          net = tf.concat(3, values=[branch_0, branch_1, branch_2, branch_3])        end_points[end_point] = net        if final_endpoint == end_point: return net, end_points        end_point = 'Mixed_4d'        with tf.variable_scope(end_point):          with tf.variable_scope('Branch_0'):            branch_0 = slim.conv2d(net, 128, [1, 1], scope='Conv2d_0a_1x1')          with tf.variable_scope('Branch_1'):            branch_1 = slim.conv2d(net, 128, [1, 1], scope='Conv2d_0a_1x1')            branch_1 = slim.conv2d(branch_1, 256, [3, 3], scope='Conv2d_0b_3x3')          with tf.variable_scope('Branch_2'):            branch_2 = slim.conv2d(net, 24, [1, 1], scope='Conv2d_0a_1x1')            branch_2 = slim.conv2d(branch_2, 64, [3, 3], scope='Conv2d_0b_3x3')          with tf.variable_scope('Branch_3'):            branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')            branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1')          net = tf.concat(3, values=[branch_0, branch_1, branch_2, branch_3])        end_points[end_point] = net        if final_endpoint == end_point: return net, end_points        end_point = 'Mixed_4e'        with tf.variable_scope(end_point):          with tf.variable_scope('Branch_0'):            branch_0 = slim.conv2d(net, 112, [1, 1], scope='Conv2d_0a_1x1')          with tf.variable_scope('Branch_1'):            branch_1 = slim.conv2d(net, 144, [1, 1], scope='Conv2d_0a_1x1')            branch_1 = slim.conv2d(branch_1, 288, [3, 3], scope='Conv2d_0b_3x3')          with tf.variable_scope('Branch_2'):            branch_2 = slim.conv2d(net, 32, [1, 1], scope='Conv2d_0a_1x1')            branch_2 = slim.conv2d(branch_2, 64, [3, 3], scope='Conv2d_0b_3x3')          with tf.variable_scope('Branch_3'):            branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')            branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1')          net = tf.concat(3, values=[branch_0, branch_1, branch_2, branch_3])        end_points[end_point] = net        if final_endpoint == end_point: return net, end_points        end_point = 'Mixed_4f'        with tf.variable_scope(end_point):          with tf.variable_scope('Branch_0'):            branch_0 = slim.conv2d(net, 256, [1, 1], scope='Conv2d_0a_1x1')          with tf.variable_scope('Branch_1'):            branch_1 = slim.conv2d(net, 160, [1, 1], scope='Conv2d_0a_1x1')            branch_1 = slim.conv2d(branch_1, 320, [3, 3], scope='Conv2d_0b_3x3')          with tf.variable_scope('Branch_2'):            branch_2 = slim.conv2d(net, 32, [1, 1], scope='Conv2d_0a_1x1')            branch_2 = slim.conv2d(branch_2, 128, [3, 3], scope='Conv2d_0b_3x3')          with tf.variable_scope('Branch_3'):            branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')            branch_3 = slim.conv2d(branch_3, 128, [1, 1], scope='Conv2d_0b_1x1')          net = tf.concat(3, values=[branch_0, branch_1, branch_2, branch_3])        end_points[end_point] = net        if final_endpoint == end_point: return net, end_points        end_point = 'MaxPool_5a_2x2'        net = slim.max_pool2d(net, [2, 2], stride=2, scope=end_point)        end_points[end_point] = net        if final_endpoint == end_point: return net, end_points        end_point = 'Mixed_5b'        with tf.variable_scope(end_point):          with tf.variable_scope('Branch_0'):            branch_0 = slim.conv2d(net, 256, [1, 1], scope='Conv2d_0a_1x1')          with tf.variable_scope('Branch_1'):            branch_1 = slim.conv2d(net, 160, [1, 1], scope='Conv2d_0a_1x1')            branch_1 = slim.conv2d(branch_1, 320, [3, 3], scope='Conv2d_0b_3x3')          with tf.variable_scope('Branch_2'):            branch_2 = slim.conv2d(net, 32, [1, 1], scope='Conv2d_0a_1x1')            branch_2 = slim.conv2d(branch_2, 128, [3, 3], scope='Conv2d_0a_3x3')          with tf.variable_scope('Branch_3'):            branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')            branch_3 = slim.conv2d(branch_3, 128, [1, 1], scope='Conv2d_0b_1x1')          net = tf.concat(3, values=[branch_0, branch_1, branch_2, branch_3])        end_points[end_point] = net        if final_endpoint == end_point: return net, end_points        end_point = 'Mixed_5c'        with tf.variable_scope(end_point):          with tf.variable_scope('Branch_0'):            branch_0 = slim.conv2d(net, 384, [1, 1], scope='Conv2d_0a_1x1')          with tf.variable_scope('Branch_1'):            branch_1 = slim.conv2d(net, 192, [1, 1], scope='Conv2d_0a_1x1')            branch_1 = slim.conv2d(branch_1, 384, [3, 3], scope='Conv2d_0b_3x3')          with tf.variable_scope('Branch_2'):            branch_2 = slim.conv2d(net, 48, [1, 1], scope='Conv2d_0a_1x1')            branch_2 = slim.conv2d(branch_2, 128, [3, 3], scope='Conv2d_0b_3x3')          with tf.variable_scope('Branch_3'):            branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')            branch_3 = slim.conv2d(branch_3, 128, [1, 1], scope='Conv2d_0b_1x1')          net = tf.concat(3, values=[branch_0, branch_1, branch_2, branch_3])        return net, end_pointsdef inception_v1(inputs,                 num_classes=1000,                 is_training=True,                 dropout_keep_prob=0.8,                 prediction_fn=slim.softmax,                 spatial_squeeze=True,                 reuse=None,                 scope='InceptionV1'):  with tf.variable_scope(scope, 'InceptionV1', [inputs, num_classes],                         reuse=reuse) as scope:    with slim.arg_scope([slim.batch_norm, slim.dropout],                        is_training=is_training):      net, end_points = inception_v1_base(inputs, scope=scope)      with tf.variable_scope('Logits'):        net = slim.avg_pool2d(net, [7, 7], stride=1, scope='AvgPool_0a_7x7')        net = slim.dropout(net,                           dropout_keep_prob, scope='Dropout_0b')        logits = slim.conv2d(net, num_classes, [1, 1], activation_fn=None,                             normalizer_fn=None, scope='Conv2d_0c_1x1')        if spatial_squeeze:          logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze')        end_points['Logits'] = logits        end_points['Predictions'] = prediction_fn(logits, scope='Predictions')  return logits, end_points

没错,还是那个错误,错误出现在 end_point = 'Mixed_3b'  部分的这一句:

branch_1 = slim.conv2d(branch_1, 128, [3, 3], scope='Conv2d_0b_3x3')


已在 stackoverflow上提问,额...英语水平太差,描述的乱七八糟,目前还没有人解答。

0 0