Seglink代码之ICDAR2015_to_tfrecords
来源:互联网 发布:java开发手游 编辑:程序博客网 时间:2024/06/05 21:56
关键性代码在于 convert_to_example。比较重要的数据有:
1)GT的八个点
2)GT的四个点
3)image
def convert_to_example(image_data, filename, labels, ignored, labels_text, bboxes, oriented_bboxes, shape): """Build an Example proto for an image example. Args: image_data: string, JPEG encoding of RGB image; labels: list of integers, identifier for the ground truth; labels_text: list of strings, human-readable labels; oriented_bboxes: list of bounding oriented boxes; each box is a list of floats in [0, 1]; specifying [x1, y1, x2, y2, x3, y3, x4, y4] bboxes: list of bbox in rectangle, [xmin, ymin, xmax, ymax] Returns: Example proto """ image_format = b'JPEG' oriented_bboxes = np.asarray(oriented_bboxes) bboxes = np.asarray(bboxes) example = tf.train.Example(features=tf.train.Features(feature={ 'image/shape': int64_feature(list(shape)), 'image/object/bbox/xmin': float_feature(list(bboxes[:, 0])), 'image/object/bbox/ymin': float_feature(list(bboxes[:, 1])), 'image/object/bbox/xmax': float_feature(list(bboxes[:, 2])), 'image/object/bbox/ymax': float_feature(list(bboxes[:, 3])), 'image/object/bbox/x1': float_feature(list(oriented_bboxes[:, 0])), 'image/object/bbox/y1': float_feature(list(oriented_bboxes[:, 1])), 'image/object/bbox/x2': float_feature(list(oriented_bboxes[:, 2])), 'image/object/bbox/y2': float_feature(list(oriented_bboxes[:, 3])), 'image/object/bbox/x3': float_feature(list(oriented_bboxes[:, 4])), 'image/object/bbox/y3': float_feature(list(oriented_bboxes[:, 5])), 'image/object/bbox/x4': float_feature(list(oriented_bboxes[:, 6])), 'image/object/bbox/y4': float_feature(list(oriented_bboxes[:, 7])), 'image/object/bbox/label': int64_feature(labels), 'image/object/bbox/label_text': bytes_feature(labels_text), 'image/object/bbox/ignored': int64_feature(ignored), 'image/format': bytes_feature(image_format), 'image/filename': bytes_feature(filename), 'image/encoded': bytes_feature(image_data)})) return exampledef get_split(split_name, dataset_dir, file_pattern, num_samples, reader=None): dataset_dir = util.io.get_absolute_path(dataset_dir) if util.str.contains(file_pattern, '%'): file_pattern = util.io.join_path(dataset_dir, file_pattern % split_name) else: file_pattern = util.io.join_path(dataset_dir, file_pattern) # Allowing None in the signature so that dataset_factory can use the default. if reader is None: reader = tf.TFRecordReader keys_to_features = { 'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), 'image/format': tf.FixedLenFeature((), tf.string, default_value='jpeg'), 'image/filename': tf.FixedLenFeature((), tf.string, default_value=''), 'image/shape': tf.FixedLenFeature([3], tf.int64), 'image/object/bbox/xmin': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/ymin': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/xmax': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/ymax': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/x1': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/x2': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/x3': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/x4': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/y1': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/y2': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/y3': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/y4': tf.VarLenFeature(dtype=tf.float32), 'image/object/bbox/ignored': tf.VarLenFeature(dtype=tf.int64), 'image/object/bbox/label': tf.VarLenFeature(dtype=tf.int64), } items_to_handlers = { 'image': slim.tfexample_decoder.Image('image/encoded', 'image/format'), 'shape': slim.tfexample_decoder.Tensor('image/shape'), 'filename': slim.tfexample_decoder.Tensor('image/filename'), 'object/bbox': slim.tfexample_decoder.BoundingBox( ['ymin', 'xmin', 'ymax', 'xmax'], 'image/object/bbox/'), 'object/oriented_bbox/x1': slim.tfexample_decoder.Tensor('image/object/bbox/x1'), 'object/oriented_bbox/x2': slim.tfexample_decoder.Tensor('image/object/bbox/x2'), 'object/oriented_bbox/x3': slim.tfexample_decoder.Tensor('image/object/bbox/x3'), 'object/oriented_bbox/x4': slim.tfexample_decoder.Tensor('image/object/bbox/x4'), 'object/oriented_bbox/y1': slim.tfexample_decoder.Tensor('image/object/bbox/y1'), 'object/oriented_bbox/y2': slim.tfexample_decoder.Tensor('image/object/bbox/y2'), 'object/oriented_bbox/y3': slim.tfexample_decoder.Tensor('image/object/bbox/y3'), 'object/oriented_bbox/y4': slim.tfexample_decoder.Tensor('image/object/bbox/y4'), 'object/label': slim.tfexample_decoder.Tensor('image/object/bbox/label'), 'object/ignored': slim.tfexample_decoder.Tensor('image/object/bbox/ignored') } decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers) labels_to_names = {0:'background', 1:'text'} items_to_descriptions = { 'image': 'A color image of varying height and width.', 'shape': 'Shape of the image', 'object/bbox': 'A list of bounding boxes, one per each object.', 'object/label': 'A list of labels, one per each object.', } #Provider to get the dataset return slim.dataset.Dataset( data_sources=file_pattern, # data address reader=reader, decoder=decoder, num_samples=num_samples, items_to_descriptions=items_to_descriptions, num_classes=2, labels_to_names=labels_to_names)
阅读全文
0 0
- Seglink代码之ICDAR2015_to_tfrecords
- seglink代码之 generate_anchors_one layer
- Seglink代码之二 数据增强 distorted_bounding_box_crop
- 论文:seglink
- seglink 环境配置
- seglink 论文阅读
- SegLink on github-Detecting Oriented Text in Natural Images by Linking Segments
- 代码测试之代码调试
- 代码测试之代码调试
- 代码测试之代码调试
- 代码测试之代码调试
- 代码测试之代码调试
- 代码测试之代码调试
- 代码测试之代码调试
- 代码之梦
- 仿真代码之二
- 仿真代码之三
- SQLServer2005SQLCLR代码之权限
- Rhyme/ struts2与servlet的线程安全问题
- openURL:
- log4j日志配置
- angularjs用户管理
- 【Lucas定理】BZOJ4403[序列统计]题解
- Seglink代码之ICDAR2015_to_tfrecords
- 图片懒加载
- [目标检测]Faster RCNN算法详解
- 视觉的显著性
- Referer校验
- 数据结构实验报告(三)
- 电脑常用快捷键
- Android_面试经验总结
- oozie开启hcatUrl依赖hive的元数据