subprocess source code learning - posix - 1

来源:互联网 发布:开元盛世 知乎 编辑:程序博客网 时间:2024/05/21 10:03


Just start from the initialization function Popen. Code like this:

    def __init__(self, args, bufsize=0, executable=None,                 stdin=None, stdout=None, stderr=None,                 preexec_fn=None, close_fds=False, shell=False,                 cwd=None, env=None, universal_newlines=False,                 startupinfo=None, creationflags=0):        """Create new Popen instance."""        _cleanup() # refer to Note 1        self._child_created = False        if not isinstance(bufsize, (int, long)):            raise TypeError("bufsize must be an integer")        # preexec_fn and close_fds are not supported by Windows        if mswindows:            if preexec_fn is not None:                raise ValueError("preexec_fn is not supported on Windows "                                 "platforms")            if close_fds and (stdin is not None or stdout is not None or                              stderr is not None):                raise ValueError("close_fds is not supported on Windows "                                 "platforms if you redirect stdin/stdout/stderr")                # startupinfo and creationflags are not suppported by posix systems        else:            # POSIX            if startupinfo is not None:                raise ValueError("startupinfo is only supported on Windows "                                 "platforms")            if creationflags != 0:                raise ValueError("creationflags is only supported on Windows "                                 "platforms")        self.stdin = None        self.stdout = None        self.stderr = None        self.pid = None        self.returncode = None        self.universal_newlines = universal_newlines        # Input and output objects. The general principle is like        # this:        #        # Parent                   Child        # ------                   -----        # p2cwrite   ---stdin--->  p2cread        # c2pread    <--stdout---  c2pwrite        # errread    <--stderr---  errwrite        #        # On POSIX, the child objects are file descriptors.  On        # Windows, these are Windows file handles.  The parent objects        # are file descriptors on both platforms.  The parent objects        # are None when not using PIPEs. The child objects are None        # when not redirecting.        (p2cread, p2cwrite,         c2pread, c2pwrite,         errread, errwrite) = self._get_handles(stdin, stdout, stderr) # refer to Note 2        self._execute_child(args, executable, preexec_fn, close_fds,                            cwd, env, universal_newlines,                            startupinfo, creationflags, shell,                            p2cread, p2cwrite,                            c2pread, c2pwrite,                            errread, errwrite) # launch the sub process. Refer to Note 3        if mswindows:            if p2cwrite is not None:                p2cwrite = msvcrt.open_osfhandle(p2cwrite.Detach(), 0)            if c2pread is not None:                c2pread = msvcrt.open_osfhandle(c2pread.Detach(), 0)            if errread is not None:                errread = msvcrt.open_osfhandle(errread.Detach(), 0)        # open fds of p2cwrite, c2pread, errread        if p2cwrite is not None:            self.stdin = os.fdopen(p2cwrite, 'wb', bufsize)        if c2pread is not None:            if universal_newlines:                # if universal_newlines is indicated                # tips about universal_newlines support:                # In addition to the standard fopen() values mode may be 'U' or 'rU'.                 # Python is usually built with universal newline support;                 # supplying 'U' opens the file as a text file, but lines may be terminated by any of the following:                 # the Unix end-of-line convention '\n', the Macintosh convention '\r', or the Windows convention '\r\n'.                 # All of these external representations are seen as '\n' by the Python program.                 # If Python is built without universal newline support a mode with 'U' is the same as normal text mode.                 # Note that file objects so opened also have an attribute called newlines which has a value of None (if no newlines have yet been seen), '\n', '\r', '\r\n', or a tuple containing all the newline types seen.                self.stdout = os.fdopen(c2pread, 'rU', bufsize)             else:                self.stdout = os.fdopen(c2pread, 'rb', bufsize)        if errread is not None:            if universal_newlines:                self.stderr = os.fdopen(errread, 'rU', bufsize)            else:                self.stderr = os.fdopen(errread, 'rb', bufsize)

Note 1:

Code of _cleanup() is like this:

def _cleanup():    '''    This function is used to remove the finished child process    '''    for inst in _active[:]:         if inst._internal_poll(_deadstate=sys.maxint) >= 0: # refer to Note 1-1            # return_value >= 0, means that the child process is finished.            try:                _active.remove(inst) # _active is initialized to [] when module of subprocess is first present (importted).            except ValueError:                # This can happen if two threads create a new Popen instance.                # It's harmless that it was already removed, so ignore.                pass

Note 1-1:

Code of self._internal_poll() is like this, which is defined within class Popen.

For finished sub processes, this function will return an integer GE 0, while None for the unfinished sub processes.

        def _internal_poll(self, _deadstate=None):            """Check if child process has terminated.  Returns returncode            attribute."""            if self.returncode is None:                 # means the child is not finished yet, then check it for another time.                try:                    pid, sts = os.waitpid(self.pid, os.WNOHANG) # this will return immediately, if sub process finished, the return value is (pid, status)                                                                # else, the return value is (0, 0)                                                if pid == self.pid:                         # if child process finished, deal with return code                        self._handle_exitstatus(sts) # refer to Note 1-1-1                    else:                         # else, do nothing, self.returncode would be None.                        pass                except os.error:                    if _deadstate is not None:                         # if error occurs during watipid(), return the max int                        self.returncode = _deadstate            # return the return code directly, if the process already finished            return self.returncode

Note 1-1-1:

Code of self._handle_exitstatus() is like the following code clips. And this function is implemented within class Popen.

If the child is terminated by a signal, then set the return code as the signal number;

if the child exits with system call exit(), then return the parameter of exit(). For example, if the exit statement is "exit(3)", then, 3 will be assigned to self.returncode.

        def _handle_exitstatus(self, sts):                                  if os.WIFSIGNALED(sts):                # Return True if the process exited due to a signal, otherwise return False. Availability: Unix.                self.returncode = -os.WTERMSIG(sts) # Return the signal which caused the process to exit. Availability: Unix.            elif os.WIFEXITED(sts):                # Return True if the process exited using the exit(2) system call, otherwise return False. Availability: Unix.                self.returncode = os.WEXITSTATUS(sts) # If WIFEXITED(status) is true, return the integer parameter to the exit(2) system call. Otherwise, the return value is meaningless. Availability: Unix.            else:                # Should never happen                raise RuntimeError("Unknown child exit status!")

Note 2:

self._get_handles() is preparing the pipes for the sub process. 

According to the official comments:

        # Parent                   Child        # ------                   -----        # p2cwrite   ---stdin--->  p2cread        # c2pread    <--stdout---  c2pwrite        # errread    <--stderr---  errwrite
we call tell that,

p2cread is used for child to get the input from parent;

p2cwrite is used for parent to send input to child;

c2pread is used by parent to get output from chuild;

c2pwrite is used by child to send output to parent;

errread is used by parent to get error info from child;

errwirte is used by child to send error info to parent.

Code is like this:

        def _get_handles(self, stdin, stdout, stderr):            """Construct and return tupel with IO objects:            p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite            """            p2cread, p2cwrite = None, None            c2pread, c2pwrite = None, None            errread, errwrite = None, None            # stdin, stdout can be:            # None;            # PIPE;            # a file descirptor (a positive number);            # a file-object;             # With None, no redirection will occur; the child’s file handles will be inherited from the parent            # with PIPE, a new pipe will be craeted;            if stdin is None:                pass            elif stdin == PIPE:                p2cread, p2cwrite = os.pipe()            elif isinstance(stdin, int):                 p2cread = stdin             else:                p2cread = stdin.fileno() # get the fd of the file-object             if stdout is None:                pass            elif stdout == PIPE:                c2pread, c2pwrite = os.pipe()            elif isinstance(stdout, int):                c2pwrite = stdout            else:                c2pwrite = stdout.fileno()            if stderr is None:                pass            elif stderr == PIPE:                errread, errwrite = os.pipe()            elif stderr == STDOUT:                # stedrr has another more option                errwrite = c2pwrite            elif isinstance(stderr, int):                errwrite = stderr            else:                # Assuming file-like object                errwrite = stderr.fileno()            return (p2cread, p2cwrite,                    c2pread, c2pwrite,                    errread, errwrite)


Note 3:

The child process is being launched by this, of which the code is:

        def _execute_child(self, args, executable, preexec_fn, close_fds,                           cwd, env, universal_newlines,                           startupinfo, creationflags, shell,                           p2cread, p2cwrite,                           c2pread, c2pwrite,                           errread, errwrite):            """Execute program (POSIX version)"""            # convert the args to list            if isinstance(args, types.StringTypes): # module of types defines names for all type symbols known in the standard interpreter.                                                    # types.StringTypes = (<type 'int'>, <type 'unicode'>)                                                    # if args are passed in as string, code will arrive at here                args = [args]            else:                # if args are passed in as sequence, code will come here                args = list(args)            # handle shell            if shell:                args = ["/bin/sh", "-c"] + args            # handle executable            # consider this scenario, exectable is not indicated, means, it's None as default, and            # the args are passed in as a string, for example, 'ls -l'            # then what's will happen?            # this section of code is consider 'ls -l' as the executable, and absoultly, will get error            if executable is None:                executable = args[0]            # For transferring possible exec failure from child to parent            # The first char specifies the exception type: 0 means            # OSError, 1 means some other error.            # this pipe is used to get errors when try to luanch the executable.            # and after the executable is launched successfully, errread and errwrite will be used get the errors                        errpipe_read, errpipe_write = os.pipe()            try:                try:                    self._set_cloexec_flag(errpipe_write) # ?? don't konw what's this used for.                    gc_was_enabled = gc.isenabled() # ?? don't konw why                    # Disable gc to avoid bug where gc -> file_dealloc ->                    # write to stderr -> hang.  http://bugs.python.org/issue1336                    # disable gobage collection before fork a clild process and then enable it.                    gc.disable()                    try:                        self.pid = os.fork() # fork a sub process                    except:                        if gc_was_enabled:                            gc.enable()                        raise                    self._child_created = True                    if self.pid == 0:                        # Child                        try:                            # Close parent's pipe ends                            if p2cwrite is not None:                                os.close(p2cwrite)                            if c2pread is not None:                                os.close(c2pread)                            if errread is not None:                                os.close(errread)                            os.close(errpipe_read)                            # Dup fds for child                            # This is preparing for the call of self._close_fds() if close_fds is indicated                            if p2cread is not None:                                os.dup2(p2cread, 0)                            if c2pwrite is not None:                                os.dup2(c2pwrite, 1)                            if errwrite is not None:                                os.dup2(errwrite, 2)                            # Close pipe fds.  Make sure we don't close the same                            # fd more than once, or standard fds.                            if p2cread is not None and p2cread not in (0,):                                os.close(p2cread)                            if c2pwrite is not None and c2pwrite not in (p2cread, 1):                                os.close(c2pwrite)                            if errwrite is not None and errwrite not in (p2cread, c2pwrite, 2):                                os.close(errwrite)                            # Close all other fds, if asked for                            # If close_fds is true, all file descriptors except 0, 1 and 2 will be closed before the child process is executed. (Unix only).                            if close_fds:                                self._close_fds(but=errpipe_write) # Refer to Note 3-1                                                        # change cwd if cwd is indicated                            if cwd is not None:                                os.chdir(cwd)                            # run preexec_fn if it's indicated                            if preexec_fn:                                preexec_fn()                                                            # launch the executable                            # On Unix, the new executable is loaded into the current process, and will have the same process id as the caller.                             # So, the executable will have the same pid with the child process                            if env is None:                                os.execvp(executable, args)                            else:                                os.execvpe(executable, args, env)                        except:                            exc_type, exc_value, tb = sys.exc_info() # get exception info                            exc_lines = traceback.format_exception(exc_type,                                                                   exc_value,                                                                   tb) # format exception info                            exc_value.child_traceback = ''.join(exc_lines) # ?? Why do we need attribute of child_traceback here??                            os.write(errpipe_write, pickle.dumps(exc_value)) # pickle Exception object to string and wirte the pipe.                                                                             # note, the errpipe_write is being used here, instead of errwite                        # This exitcode won't be reported to applications, so it                        # really doesn't matter what we return.                        os._exit(255) # note the difference here between os.exit() and os._exit()                    # Parent                    if gc_was_enabled:                        gc.enable()                finally:                    # be sure the FD is closed no matter what, and it's closed by both parent and child                    # Parent is closing this, becuase it's no use to parent;                    # Child is closing this, becuase it has wirotten error info to it                    os.close(errpipe_write)                if p2cread is not None and p2cwrite is not None:                    os.close(p2cread) # close p2cread, which are intend for child, and leave p2cwrite to input something to child                if c2pwrite is not None and c2pread is not None:                    os.close(c2pwrite) # close c2pwrite, which is intend for child, and leave c2pread to get output of child                if errwrite is not None and errread is not None:                    os.close(errwrite) # close errwrite, which is intend for child, and leave errread to get the errors of child                # Wait for exec to fail or succeed; possibly raising exception                # Exception limited to 1M                # check the status of launching exectable.                data = _eintr_retry_call(os.read, errpipe_read, 1048576) # Refer to Note 3-2            finally:                # be sure the FD is closed no matter what                # errpipe_read is only closed in parent process, becuase it's no use now                # and the child has already closed it's own errpipe_read.                 os.close(errpipe_read)            # data != "" means, there is somthing wrong when launching the executable             # if the executable is not launched successfully, should:            # wait sub process finishing,            # load the exception object,            # close the pipes,            # and then raise error            if data != "":                _eintr_retry_call(os.waitpid, self.pid, 0)                child_exception = pickle.loads(data)                for fd in (p2cwrite, c2pread, errread):                    if fd is not None:                        os.close(fd)                raise child_exception

Note 3-1:

Code of self._close_fds() is like this. And as you see, this function will close all the fds except 0, 1, 2 and the one indicated by "but"

        def _close_fds(self, but):            os.closerange(3, but) # close all fds fo [3, but), but will not be closed.            os.closerange(but + 1, MAXFD)

Note 3-2:

Code of _eintr_retry_call() is like this. Not very sure about this function, but looks like its behavior is like this:

If a system call (if the func is a system call) is interrupted, then try again, till it finishes or other error occurs.

def _eintr_retry_call(func, *args):    while True:        try:            return func(*args)        except OSError, e:            if e.errno == errno.EINTR:                # errno.EINTR means interupted system call why this??                continue            raise

Other functions except the initialization function are very easy:

self.poll()

    def poll(self):        return self._internal_poll() # Refer to Note 1-1
self.wait()
        def wait(self):            """Wait for child process to terminate.  Returns returncode            attribute."""            if self.returncode is None:                pid, sts = _eintr_retry_call(os.waitpid, self.pid, 0) # please note, the parent should be blocked here. Refer to Note 3-2                self._handle_exitstatus(sts) # Refer to Note 1-1-1            return self.returncode
self.send_signal, self.terminate and self.kill():

        def send_signal(self, sig):            """Send a signal to the process            """            os.kill(self.pid, sig) # Send signal sig to the process pid.                                    # Constants for the specific signals available on the host platform are defined in the signal module.                                    # Availability: Unix.                                           def terminate(self):            """Terminate the process with SIGTERM            """            self.send_signal(signal.SIGTERM)        def kill(self):            """Kill the process with SIGKILL            """            self.send_signal(signal.SIGKILL)



To Be Continued.

0 0
原创粉丝点击