Seglink代码之ICDAR2015_to_tfrecords

来源:互联网 发布:java开发手游 编辑:程序博客网 时间:2024/06/05 21:56

关键性代码在于 convert_to_example。比较重要的数据有:
1)GT的八个点
2)GT的四个点
3)image

def convert_to_example(image_data, filename, labels, ignored, labels_text, bboxes, oriented_bboxes, shape):    """Build an Example proto for an image example.    Args:      image_data: string, JPEG encoding of RGB image;      labels: list of integers, identifier for the ground truth;      labels_text: list of strings, human-readable labels;      oriented_bboxes: list of bounding oriented boxes; each box is a list of floats in [0, 1];          specifying [x1, y1, x2, y2, x3, y3, x4, y4]      bboxes: list of bbox in rectangle, [xmin, ymin, xmax, ymax]     Returns:      Example proto    """    image_format = b'JPEG'    oriented_bboxes = np.asarray(oriented_bboxes)    bboxes = np.asarray(bboxes)    example = tf.train.Example(features=tf.train.Features(feature={            'image/shape': int64_feature(list(shape)),            'image/object/bbox/xmin': float_feature(list(bboxes[:, 0])),            'image/object/bbox/ymin': float_feature(list(bboxes[:, 1])),            'image/object/bbox/xmax': float_feature(list(bboxes[:, 2])),            'image/object/bbox/ymax': float_feature(list(bboxes[:, 3])),            'image/object/bbox/x1': float_feature(list(oriented_bboxes[:, 0])),            'image/object/bbox/y1': float_feature(list(oriented_bboxes[:, 1])),            'image/object/bbox/x2': float_feature(list(oriented_bboxes[:, 2])),            'image/object/bbox/y2': float_feature(list(oriented_bboxes[:, 3])),            'image/object/bbox/x3': float_feature(list(oriented_bboxes[:, 4])),            'image/object/bbox/y3': float_feature(list(oriented_bboxes[:, 5])),            'image/object/bbox/x4': float_feature(list(oriented_bboxes[:, 6])),            'image/object/bbox/y4': float_feature(list(oriented_bboxes[:, 7])),            'image/object/bbox/label': int64_feature(labels),            'image/object/bbox/label_text': bytes_feature(labels_text),            'image/object/bbox/ignored': int64_feature(ignored),            'image/format': bytes_feature(image_format),            'image/filename': bytes_feature(filename),            'image/encoded': bytes_feature(image_data)}))    return exampledef get_split(split_name, dataset_dir, file_pattern, num_samples, reader=None):    dataset_dir = util.io.get_absolute_path(dataset_dir)    if util.str.contains(file_pattern, '%'):        file_pattern = util.io.join_path(dataset_dir, file_pattern % split_name)    else:        file_pattern = util.io.join_path(dataset_dir, file_pattern)    # Allowing None in the signature so that dataset_factory can use the default.    if reader is None:        reader = tf.TFRecordReader    keys_to_features = {        'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),        'image/format': tf.FixedLenFeature((), tf.string, default_value='jpeg'),        'image/filename': tf.FixedLenFeature((), tf.string, default_value=''),        'image/shape': tf.FixedLenFeature([3], tf.int64),        'image/object/bbox/xmin': tf.VarLenFeature(dtype=tf.float32),        'image/object/bbox/ymin': tf.VarLenFeature(dtype=tf.float32),        'image/object/bbox/xmax': tf.VarLenFeature(dtype=tf.float32),        'image/object/bbox/ymax': tf.VarLenFeature(dtype=tf.float32),        'image/object/bbox/x1': tf.VarLenFeature(dtype=tf.float32),        'image/object/bbox/x2': tf.VarLenFeature(dtype=tf.float32),        'image/object/bbox/x3': tf.VarLenFeature(dtype=tf.float32),        'image/object/bbox/x4': tf.VarLenFeature(dtype=tf.float32),        'image/object/bbox/y1': tf.VarLenFeature(dtype=tf.float32),        'image/object/bbox/y2': tf.VarLenFeature(dtype=tf.float32),        'image/object/bbox/y3': tf.VarLenFeature(dtype=tf.float32),        'image/object/bbox/y4': tf.VarLenFeature(dtype=tf.float32),        'image/object/bbox/ignored': tf.VarLenFeature(dtype=tf.int64),        'image/object/bbox/label': tf.VarLenFeature(dtype=tf.int64),    }    items_to_handlers = {        'image': slim.tfexample_decoder.Image('image/encoded', 'image/format'),        'shape': slim.tfexample_decoder.Tensor('image/shape'),        'filename': slim.tfexample_decoder.Tensor('image/filename'),        'object/bbox': slim.tfexample_decoder.BoundingBox(                ['ymin', 'xmin', 'ymax', 'xmax'], 'image/object/bbox/'),        'object/oriented_bbox/x1': slim.tfexample_decoder.Tensor('image/object/bbox/x1'),        'object/oriented_bbox/x2': slim.tfexample_decoder.Tensor('image/object/bbox/x2'),        'object/oriented_bbox/x3': slim.tfexample_decoder.Tensor('image/object/bbox/x3'),        'object/oriented_bbox/x4': slim.tfexample_decoder.Tensor('image/object/bbox/x4'),        'object/oriented_bbox/y1': slim.tfexample_decoder.Tensor('image/object/bbox/y1'),        'object/oriented_bbox/y2': slim.tfexample_decoder.Tensor('image/object/bbox/y2'),        'object/oriented_bbox/y3': slim.tfexample_decoder.Tensor('image/object/bbox/y3'),        'object/oriented_bbox/y4': slim.tfexample_decoder.Tensor('image/object/bbox/y4'),        'object/label': slim.tfexample_decoder.Tensor('image/object/bbox/label'),        'object/ignored': slim.tfexample_decoder.Tensor('image/object/bbox/ignored')    }    decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers)    labels_to_names = {0:'background', 1:'text'}    items_to_descriptions = {        'image': 'A color image of varying height and width.',        'shape': 'Shape of the image',        'object/bbox': 'A list of bounding boxes, one per each object.',        'object/label': 'A list of labels, one per each object.',    }    #Provider to get the dataset    return slim.dataset.Dataset(            data_sources=file_pattern,   # data address            reader=reader,            decoder=decoder,            num_samples=num_samples,            items_to_descriptions=items_to_descriptions,            num_classes=2,            labels_to_names=labels_to_names)
原创粉丝点击