queue_runner_impl.py

来源:互联网 发布:apache 二级域名配置 编辑:程序博客网 时间:2024/06/08 17:30
"""Create threads to run multiple enqueue ops."""from __future__ import absolute_importfrom __future__ import divisionfrom __future__ import print_functionimport threadingimport weakreffrom tensorflow.core.protobuf import queue_runner_pb2from tensorflow.python.framework import errorsfrom tensorflow.python.framework import opsfrom tensorflow.python.platform import tf_logging as loggingclass QueueRunner(object):  """Holds a list of enqueue operations for a queue, each to be run in a thread.  保存一个队列的入队操作列表,每个队列在线程中运行。    Queues are a convenient TensorFlow mechanism to compute tensors  asynchronously using multiple threads. For example in the canonical 'Input  Reader' setup one set of threads generates filenames in a queue; a second set  of threads read records from the files, processes them, and enqueues tensors  on a second queue; a third set of threads dequeues these input records to  construct batches and runs them through training operations.  队列是一种方便的TensorFlow机制,可以使用多个线程异步计算张量。   例如在典型的“输入读取器”设置中,一组线程在队列中生成文件名;   第二组线程从文件读取记录,处理它们,并在第二个队列中排列张量;   第三组线程将出口这些输入记录以构建批次并通过训练操作运行它们。  There are several delicate issues when running multiple threads that way:  closing the queues in sequence as the input is exhausted, correctly catching  and reporting exceptions, etc.  运行多个线程时,有几个棘手的问题:当输入用尽时顺序关闭队列,正确捕获和报告异常等。  The `QueueRunner`, combined with the `Coordinator`, helps handle these issues.  `QueueRunner` 与 `Coordinator` 相结合,可以帮助处理这些问题。  """  def __init__(self, queue=None, enqueue_ops=None, close_op=None,               cancel_op=None, queue_closed_exception_types=None,               queue_runner_def=None, import_scope=None):    """Create a QueueRunner.    On construction the `QueueRunner` adds an op to close the queue.  That op    will be run if the enqueue ops raise exceptions.    在创建 `QueueRunner` 添加一个OP来关闭队列。如果入队操作抛出异常,这个OP将会运行。    When you later call the `create_threads()` method, the `QueueRunner` will    create one thread for each op in `enqueue_ops`.  Each thread will run its    enqueue op in parallel with the other threads.  The enqueue ops do not have    to all be the same op, but it is expected that they all enqueue tensors in    `queue`.    当以后调用 `create_threads()` 方法时,`QueueRunner`将为`enqueue_ops`中的每个op创建一个线程。    每个线程将与其他线程并行运行其入队操作。 入队操作并不一定都是一样的,但预计他们都会入队队列中的张量。        Args:      queue: A `Queue`.      enqueue_ops: List of enqueue ops to run in threads later.      close_op: Op to close the queue. Pending enqueue ops are preserved.      cancel_op: Op to close the queue and cancel pending enqueue ops.      queue_closed_exception_types: Optional tuple of Exception types that        indicate that the queue has been closed when raised during an enqueue        operation.  Defaults to `(tf.errors.OutOfRangeError,)`.  Another common        case includes `(tf.errors.OutOfRangeError, tf.errors.CancelledError)`,        when some of the enqueue ops may dequeue from other Queues.      queue_runner_def: Optional `QueueRunnerDef` protocol buffer. If specified,        recreates the QueueRunner from its contents. `queue_runner_def` and the        other arguments are mutually exclusive.       import_scope: Optional `string`. Name scope to add. Only used when        initializing from protocol buffer. 可选的 `string`。    Args:      queue: A `Queue`.      enqueue_ops: 稍后在线程中运行的入队操作列表。      close_op: 关闭队列操作。 待定的入队操作被保留。      cancel_op: 关闭队列并取消挂起的排队操作。      queue_closed_exception_types: 可选的 Exception 类型的元组,表示在入队操作期间引发队列已关闭。       默认为 `(tf.errors.OutOfRangeError,)`。       另一个常见的情况包括 `(tf.errors.OutOfRangeError, tf.errors.CancelledError)` ,当某些入队操作可能会从其他队列出队。      queue_runner_def: 可选的 `QueueRunnerDef` 协议缓冲区。       如果指定,则从其内容重新创建 QueueRunner。 `queue_runner_def` 和其他参数是互斥的。      import_scope: 可选的 `string`。 添加名称范围。 仅在从协议缓冲区初始化时使用。        Raises:      ValueError: If both `queue_runner_def` and `queue` are both specified.      ValueError: If `queue` or `enqueue_ops` are not provided when not        restoring from `queue_runner_def`.    Raises:       ValueError:如果同时指定`queue_runner_def`和`queue`两者。       ValueError:如果没有从`queue_runner_def'恢复时没有提供`queue`或`enqueue_ops`。        """    if queue_runner_def:      if queue or enqueue_ops:        raise ValueError("queue_runner_def and queue are mutually exclusive.")      self._init_from_proto(queue_runner_def,                            import_scope=import_scope)    else:      self._init_from_args(          queue=queue, enqueue_ops=enqueue_ops,          close_op=close_op, cancel_op=cancel_op,          queue_closed_exception_types=queue_closed_exception_types)    # Protect the count of runs to wait for.    self._lock = threading.Lock()    # A map from a session object to the number of outstanding queue runner    # threads for that session.    self._runs_per_session = weakref.WeakKeyDictionary()    # List of exceptions raised by the running threads.    self._exceptions_raised = []  def _init_from_args(self, queue=None, enqueue_ops=None, close_op=None,                      cancel_op=None, queue_closed_exception_types=None):    """Create a QueueRunner from arguments.    从参数创建 QueueRunner。    Args:      queue: A `Queue`.      enqueue_ops: List of enqueue ops to run in threads later.      close_op: Op to close the queue. Pending enqueue ops are preserved.      cancel_op: Op to close the queue and cancel pending enqueue ops.      queue_closed_exception_types: Tuple of exception types, which indicate        the queue has been safely closed.    Raises:      ValueError: If `queue` or `enqueue_ops` are not provided when not        restoring from `queue_runner_def`.      TypeError: If `queue_closed_exception_types` is provided, but is not        a non-empty tuple of error types (subclasses of `tf.errors.OpError`).    """    if not queue or not enqueue_ops:      raise ValueError("Must provide queue and enqueue_ops.")    self._queue = queue    self._enqueue_ops = enqueue_ops    self._close_op = close_op    self._cancel_op = cancel_op    if queue_closed_exception_types is not None:      if (not isinstance(queue_closed_exception_types, tuple)          or not queue_closed_exception_types          or not all(issubclass(t, errors.OpError)                     for t in queue_closed_exception_types)):        raise TypeError(            "queue_closed_exception_types, when provided, "            "must be a tuple of tf.error types, but saw: %s"            % queue_closed_exception_types)    self._queue_closed_exception_types = queue_closed_exception_types    # Close when no more will be produced, but pending enqueues should be    # preserved.    if self._close_op is None:      self._close_op = self._queue.close()    # Close and cancel pending enqueues since there was an error and we want    # to unblock everything so we can cleanly exit.    if self._cancel_op is None:      self._cancel_op = self._queue.close(cancel_pending_enqueues=True)    if not self._queue_closed_exception_types:      self._queue_closed_exception_types = (errors.OutOfRangeError,)    else:      self._queue_closed_exception_types = tuple(          self._queue_closed_exception_types)  def _init_from_proto(self, queue_runner_def, import_scope=None):    """Create a QueueRunner from `QueueRunnerDef`.    从`QueueRunnerDef`创建QueueRunner。    Args:      queue_runner_def: Optional `QueueRunnerDef` protocol buffer.      import_scope: Optional `string`. Name scope to add.    """    assert isinstance(queue_runner_def, queue_runner_pb2.QueueRunnerDef)    g = ops.get_default_graph()    self._queue = g.as_graph_element(        ops.prepend_name_scope(queue_runner_def.queue_name, import_scope))    self._enqueue_ops = [g.as_graph_element(        ops.prepend_name_scope(op, import_scope))                         for op in queue_runner_def.enqueue_op_name]    self._close_op = g.as_graph_element(ops.prepend_name_scope(        queue_runner_def.close_op_name, import_scope))    self._cancel_op = g.as_graph_element(ops.prepend_name_scope(        queue_runner_def.cancel_op_name, import_scope))    self._queue_closed_exception_types = tuple(        errors.exception_type_from_error_code(code)        for code in queue_runner_def.queue_closed_exception_types)    # Legacy support for old QueueRunnerDefs created before this field    # was added.    if not self._queue_closed_exception_types:      self._queue_closed_exception_types = (errors.OutOfRangeError,)  @property  def queue(self):    return self._queue  @property  def enqueue_ops(self):    return self._enqueue_ops  @property  def close_op(self):    return self._close_op  @property  def cancel_op(self):    return self._cancel_op  @property  def queue_closed_exception_types(self):    return self._queue_closed_exception_types  @property  def exceptions_raised(self):    """Exceptions raised but not handled by the `QueueRunner` threads.    `QueueRunner` 线程引发异常但未处理。    Exceptions raised in queue runner threads are handled in one of two ways    depending on whether or not a `Coordinator` was passed to    `create_threads()`:    * With a `Coordinator`, exceptions are reported to the coordinator and      forgotten by the `QueueRunner`.    * Without a `Coordinator`, exceptions are captured by the `QueueRunner` and      made available in this `exceptions_raised` property.    Returns:      A list of Python `Exception` objects.  The list is empty if no exception      was captured.  (No exceptions are captured when using a Coordinator.)    """    return self._exceptions_raised  @property  def name(self):    """The string name of the underlying Queue."""    return self._queue.name  # pylint: disable=broad-except  def _run(self, sess, enqueue_op, coord=None):    """Execute the enqueue op in a loop, close the queue in case of error.    在循环中执行入队操作,在发生错误时关闭队列。    Args:      sess: A Session.      enqueue_op: The Operation to run.      coord: Optional Coordinator object for reporting errors and checking        for stop conditions.    """    decremented = False    try:      while True:        if coord and coord.should_stop():          break        try:          sess.run(enqueue_op)        except self._queue_closed_exception_types:  # pylint: disable=catching-non-exception          # This exception indicates that a queue was closed.          with self._lock:            self._runs_per_session[sess] -= 1            decremented = True            if self._runs_per_session[sess] == 0:              try:                sess.run(self._close_op)              except Exception as e:                # Intentionally ignore errors from close_op.                logging.vlog(1, "Ignored exception: %s", str(e))            return    except Exception as e:      # This catches all other exceptions.      if coord:        coord.request_stop(e)      else:        logging.error("Exception in QueueRunner: %s", str(e))        with self._lock:          self._exceptions_raised.append(e)        raise    finally:      # Make sure we account for all terminations: normal or errors.      if not decremented:        with self._lock:          self._runs_per_session[sess] -= 1  def _close_on_stop(self, sess, cancel_op, coord):    """Close the queue when the Coordinator requests stop.    Args:      sess: A Session.      cancel_op: The Operation to run.      coord: Coordinator.    """    coord.wait_for_stop()    try:      sess.run(cancel_op)    except Exception as e:      # Intentionally ignore errors from cancel_op.      logging.vlog(1, "Ignored exception: %s", str(e))  # pylint: enable=broad-except  def create_threads(self, sess, coord=None, daemon=False, start=False):    """Create threads to run the enqueue ops for the given session.    创建线程来运行给定会话的入队操作。    This method requires a session in which the graph was launched.  It creates    a list of threads, optionally starting them.  There is one thread for each    op passed in `enqueue_ops`.    此方法需要已经启动了的graph中的会话。它创建一个线程列表,可以选择启动它们。    在 `enqueue_ops` 中,每个操作都有一个线程。    The `coord` argument is an optional coordinator that the threads will use    to terminate together and report exceptions.  If a coordinator is given,    this method starts an additional thread to close the queue when the    coordinator requests a stop.    `coord` 参数是可选的协调器,线程将终止在一起并报告异常。    如果给定了一个协调器,当协调器请求停止时,此方法启动一个额外的线程来关闭队列。    If previously created threads for the given session are still running, no    new threads will be created.    如果先前创建的给定会话线程仍在运行,则不会创建新线程。    Args:      sess: A `Session`.      coord: Optional `Coordinator` object for reporting errors and checking        stop conditions.      daemon: Boolean.  If `True` make the threads daemon threads.      start: Boolean.  If `True` starts the threads.  If `False` the        caller must call the `start()` method of the returned threads.    Returns:      A list of threads.    """    with self._lock:      try:        if self._runs_per_session[sess] > 0:          # Already started: no new threads to return.          return []      except KeyError:        # We haven't seen this session yet.        pass      self._runs_per_session[sess] = len(self._enqueue_ops)      self._exceptions_raised = []    ret_threads = [threading.Thread(target=self._run, args=(sess, op, coord))                   for op in self._enqueue_ops]    if coord:      ret_threads.append(threading.Thread(target=self._close_on_stop,                                          args=(sess, self._cancel_op, coord)))    for t in ret_threads:      if coord:        coord.register_thread(t)      if daemon:        t.daemon = True      if start:        t.start()    return ret_threads  def to_proto(self, export_scope=None):    """Converts this `QueueRunner` to a `QueueRunnerDef` protocol buffer.    Args:      export_scope: Optional `string`. Name scope to remove.    Returns:      A `QueueRunnerDef` protocol buffer, or `None` if the `Variable` is not in      the specified name scope.    """    if (export_scope is None or        self.queue.name.startswith(export_scope)):      queue_runner_def = queue_runner_pb2.QueueRunnerDef()      queue_runner_def.queue_name = ops.strip_name_scope(          self.queue.name, export_scope)      for enqueue_op in self.enqueue_ops:        queue_runner_def.enqueue_op_name.append(            ops.strip_name_scope(enqueue_op.name, export_scope))      queue_runner_def.close_op_name = ops.strip_name_scope(          self.close_op.name, export_scope)      queue_runner_def.cancel_op_name = ops.strip_name_scope(          self.cancel_op.name, export_scope)      queue_runner_def.queue_closed_exception_types.extend([          errors.error_code_from_exception_type(cls)          for cls in self._queue_closed_exception_types])      return queue_runner_def    else:      return None  @staticmethod  def from_proto(queue_runner_def, import_scope=None):    """Returns a `QueueRunner` object created from `queue_runner_def`."""    return QueueRunner(queue_runner_def=queue_runner_def,                       import_scope=import_scope)def add_queue_runner(qr, collection=ops.GraphKeys.QUEUE_RUNNERS):  """Adds a `QueueRunner` to a collection in the graph.  在graph集合中增加了一个 `QueueRunner`。    When building a complex model that uses many queues it is often difficult to  gather all the queue runners that need to be run.  This convenience function  allows you to add a queue runner to a well known collection in the graph.  当使用多个队列,建立一个复杂的模型,往往是很难收集所有需要运行的 queue runners。  这个便利功能允许你在 graph 中添加一个队列的集合给一个众所周知的集合。  The companion method `start_queue_runners()` can be used to start threads for  all the collected queue runners.  伴随的方法` start_queue_runners() `可用于启动线程的所有收集到的排队者。  Args:    qr: A `QueueRunner`.    collection: A `GraphKey` specifying the graph collection to add      the queue runner to.  Defaults to `GraphKeys.QUEUE_RUNNERS`.  """  ops.add_to_collection(collection, qr)def start_queue_runners(sess=None, coord=None, daemon=True, start=True,                        collection=ops.GraphKeys.QUEUE_RUNNERS):  """Starts all queue runners collected in the graph.  开始在图中收集的所有 queue runners。  This is a companion method to `add_queue_runner()`.  It just starts  threads for all queue runners collected in the graph.  It returns  the list of all threads.  这是 `add_queue_runner()` 的配套方法。 它只是为 graph 中收集的所有 queue runners 启动线程。  它返回所有线程的列表。  Args:    sess: `Session` used to run the queue ops.  Defaults to the      default session.    coord: Optional `Coordinator` for coordinating the started threads.    daemon: Whether the threads should be marked as `daemons`, meaning      they don't block program exit.    start: Set to `False` to only create the threads, not start them.    collection: A `GraphKey` specifying the graph collection to      get the queue runners from.  Defaults to `GraphKeys.QUEUE_RUNNERS`.  Returns:    A list of threads.  """  if sess is None:    sess = ops.get_default_session()    if not sess:      raise ValueError("Cannot start queue runners: No default session is "                       "registered. Use `with sess.as_default()` or pass an "                       "explicit session to tf.start_queue_runners(sess=sess)")  with sess.graph.as_default():    threads = []    for qr in ops.get_collection(collection):      threads.extend(qr.create_threads(sess, coord=coord, daemon=daemon,                                       start=start))  return threadsops.register_proto_function(ops.GraphKeys.QUEUE_RUNNERS,                            proto_type=queue_runner_pb2.QueueRunnerDef,                            to_proto=QueueRunner.to_proto,                            from_proto=QueueRunner.from_proto)

0 0
原创粉丝点击