docker study --- containerd

来源:互联网 发布:淘宝卖家发物流怎么办 编辑:程序博客网 时间:2024/06/05 17:12

上次写了个containerd的源码分析,不知道为什么CSDN给删掉了,难道是审核不通过?......

重新写一篇,希望这次顺利

Containerd 是一个控制 runC 的守护进程,主要是为了性能和密度。Containerd 提供一个命令行客户端和 API,在一个机器上管理容器。Containerd 使用 runC 来根据 OCI 规范运行容器 。

containerd 是docker “四大金刚”之一。 上接docker daemon,下连shim。它将docker daemon传过来的GRPC消息转化成实际要操作runc要执行的命令。

containerd 最重要的就是Supervisot中的两个go chan:Task 和startTask。

// Supervisor represents a container supervisortype Supervisor struct {// stateDir is the directory on the system to store container runtime state information.stateDir string// name of the OCI compatible runtime used to execute containersruntime     stringruntimeArgs []stringshim        stringcontainers  map[string]*containerInfostartTasks  chan *startTask// we need a lock around the subscribers map only because additions and deletions from// the map are via the API so we cannot really control the concurrencysubscriberLock sync.RWMutexsubscribers    map[chan Event]struct{}machine        Machinetasks          chan Taskmonitor        *MonitoreventLog       []EventeventLock      sync.Mutextimeout        time.Duration}

还有三个重要的go协程。

1. api server协程,主要负责将Task放入Task chan

2. supervisor协程,主要将Task从Task chan中取出放入startTask chan

3. spuervisor worker协程(十个)主要负责从startTask chan中取出startTask,做相应的操作。


下边,以CreateContainer为例。

1. api server

当有个CreateContainer(GRPC)消息传入时会调用相应的函数

// Server API for API servicetype APIServer interface {GetServerVersion(context.Context, *GetServerVersionRequest) (*GetServerVersionResponse, error)CreateContainer(context.Context, *CreateContainerRequest) (*CreateContainerResponse, error)UpdateContainer(context.Context, *UpdateContainerRequest) (*UpdateContainerResponse, error)Signal(context.Context, *SignalRequest) (*SignalResponse, error)UpdateProcess(context.Context, *UpdateProcessRequest) (*UpdateProcessResponse, error)AddProcess(context.Context, *AddProcessRequest) (*AddProcessResponse, error)CreateCheckpoint(context.Context, *CreateCheckpointRequest) (*CreateCheckpointResponse, error)DeleteCheckpoint(context.Context, *DeleteCheckpointRequest) (*DeleteCheckpointResponse, error)ListCheckpoint(context.Context, *ListCheckpointRequest) (*ListCheckpointResponse, error)State(context.Context, *StateRequest) (*StateResponse, error)Events(*EventsRequest, API_EventsServer) errorStats(context.Context, *StatsRequest) (*StatsResponse, error)}
func (s *apiServer) CreateContainer(ctx context.Context, c *types.CreateContainerRequest) (*types.CreateContainerResponse, error) {if c.BundlePath == "" {return nil, errors.New("empty bundle path")}// Maxx start tasks for create commande := &supervisor.StartTask{}e.ID = c.Ide.BundlePath = c.BundlePathe.Stdin = c.Stdine.Stdout = c.Stdoute.Stderr = c.Stderre.Labels = c.Labelse.NoPivotRoot = c.NoPivotRoote.Runtime = c.Runtimee.RuntimeArgs = c.RuntimeArgse.StartResponse = make(chan supervisor.StartResponse, 1)if c.Checkpoint != "" {e.CheckpointDir = c.CheckpointDire.Checkpoint = &runtime.Checkpoint{Name: c.Checkpoint,}}// sendtask put task  to task chan// D:\study\go\containerd-docker-v1.12.x\supervisor\supervisor.gos.sv.SendTask(e)if err := <-e.ErrorCh(); err != nil {return nil, err}r := <-e.StartResponseapiC, err := createAPIContainer(r.Container, false)if err != nil {return nil, err}return &types.CreateContainerResponse{Container: apiC,}, nil}
其中SendTask会将Task放入Task chan
// SendTask sends the provided event the the supervisors main event loopfunc (s *Supervisor) SendTask(evt Task) {TasksCounter.Inc(1)s.tasks <- evt}

2. supervisor协程

这里要先说说containerd的开始流程。在containerd 的main函数中,start了supervisor和worker协程。

func main() {logrus.SetFormatter(&logrus.TextFormatter{TimestampFormat: time.RFC3339Nano})app := cli.NewApp()app.Name = "containerd"if containerd.GitCommit != "" {app.Version = fmt.Sprintf("%s commit: %s", containerd.Version, containerd.GitCommit)} else {app.Version = containerd.Version}app.Usage = usageapp.Flags = daemonFlagsapp.Before = func(context *cli.Context) error {setupDumpStacksTrap()if context.GlobalBool("debug") {logrus.SetLevel(logrus.DebugLevel)if context.GlobalDuration("metrics-interval") > 0 {if err := debugMetrics(context.GlobalDuration("metrics-interval"), context.GlobalString("graphite-address")); err != nil {return err}}}if p := context.GlobalString("pprof-address"); len(p) > 0 {pprof.Enable(p)}if err := checkLimits(); err != nil {return err}return nil}app.Action = func(context *cli.Context) {// Maxx  daemonif err := daemon(context); err != nil {logrus.Fatal(err)}}if err := app.Run(os.Args); err != nil {logrus.Fatal(err)}}
这里要关注daemon函数,这个函数对三个协程做了初始化。

func daemon(context *cli.Context) error {s := make(chan os.Signal, 2048)signal.Notify(s, syscall.SIGTERM, syscall.SIGINT)sv, err := supervisor.New(context.String("state-dir"),context.String("runtime"),context.String("shim"),context.StringSlice("runtime-args"),context.Duration("start-timeout"),context.Int("retain-count"))if err != nil {return err}// Maxx  start 10 worker threadwg := &sync.WaitGroup{}for i := 0; i < 10; i++ {wg.Add(1)w := supervisor.NewWorker(sv, wg)go w.Start()}// Maxx start GRPC server of containerdif err := sv.Start(); err != nil {return err}// Split the listen string of the form proto://addrlistenSpec := context.String("listen")listenParts := strings.SplitN(listenSpec, "://", 2)if len(listenParts) != 2 {return fmt.Errorf("bad listen address format %s, expected proto://address", listenSpec)}// Maxx  start Supervisor// definition of supervisor is underD:\study\go\containerd-docker-v1.12.x\supervisor\supervisor.go// start functionserver, err := startServer(listenParts[0], listenParts[1], sv)if err != nil {return err}for ss := range s {switch ss {default:logrus.Infof("stopping containerd after receiving %s", ss)server.Stop()os.Exit(0)}}return nil}
这里主要关注supervisor协程。worker协程稍后章节分析。

上边函数调用了supervisor的Start函数。

// Start is a non-blocking call that runs the supervisor for monitoring contianer processes and// executing new containers.//// This event loop is the only thing that is allowed to modify state of containers and processes// therefore it is save to do operations in the handlers that modify state of the system or// state of the Supervisorfunc (s *Supervisor) Start() error {logrus.WithFields(logrus.Fields{"stateDir":    s.stateDir,"runtime":     s.runtime,"runtimeArgs": s.runtimeArgs,"memory":      s.machine.Memory,"cpus":        s.machine.Cpus,}).Debug("containerd: supervisor running")go func() {for i := range s.tasks {// Maxx xxxs.handleTask(i)}}()return nil}
func (s *Supervisor) handleTask(i Task) {var err errorswitch t := i.(type) {case *AddProcessTask:err = s.addProcess(t)case *CreateCheckpointTask:err = s.createCheckpoint(t)case *DeleteCheckpointTask:err = s.deleteCheckpoint(t)case *StartTask:// Maxx   start come hereerr = s.start(t)case *DeleteTask:err = s.delete(t)case *ExitTask:err = s.exit(t)case *GetContainersTask:err = s.getContainers(t)case *SignalTask:err = s.signal(t)case *StatsTask:err = s.stats(t)case *UpdateTask:err = s.updateContainer(t)case *UpdateProcessTask:err = s.updateProcess(t)case *OOMTask:err = s.oom(t)default:err = ErrUnknownTask}if err != errDeferredResponse {i.ErrorCh() <- errclose(i.ErrorCh())}}
这里我们要关注的是start函数

func (s *Supervisor) start(t *StartTask) error {start := time.Now()rt := s.runtimertArgs := s.runtimeArgsif t.Runtime != "" {rt = t.RuntimertArgs = t.RuntimeArgs}container, err := runtime.New(runtime.ContainerOpts{Root:        s.stateDir,ID:          t.ID,Bundle:      t.BundlePath,Runtime:     rt,RuntimeArgs: rtArgs,Shim:        s.shim,Labels:      t.Labels,NoPivotRoot: t.NoPivotRoot,Timeout:     s.timeout,})if err != nil {return err}s.containers[t.ID] = &containerInfo{container: container,}ContainersCounter.Inc(1)task := &startTask{Err:           t.ErrorCh(),Container:     container,StartResponse: t.StartResponse,Stdin:         t.Stdin,Stdout:        t.Stdout,Stderr:        t.Stderr,}if t.Checkpoint != nil {task.CheckpointPath = filepath.Join(t.CheckpointDir, t.Checkpoint.Name)}//Maxx  构造一个新的startTask,并传递给startTasks channel// then go to D:\study\go\containerd-docker-v1.12.x\supervisor\worker.go// Supervisor.worker的Start方法中,读取startTasks channel,并调用runtime.Container接口的Start方法s.startTasks <- taskContainerCreateTimer.UpdateSince(start)return errDeferredResponse}

这里我们发现supervisor将Task转化后放入了startTask chan

3. spuervisor worker

上面在daemon函数中调用了

for i := 0; i < 10; i++ {wg.Add(1)w := supervisor.NewWorker(sv, wg)go w.Start()}
这起了10个worker协程。并调用了Start()方法

// Start runs a loop in charge of starting new containersfunc (w *worker) Start() {defer w.wg.Done()for t := range w.s.startTasks {started := time.Now()// Maxx  start D:\study\go\containerd-docker-v1.12.x\runtime\container.go, also call the start belowprocess, err := t.Container.Start(t.CheckpointPath, runtime.NewStdio(t.Stdin, t.Stdout, t.Stderr))......if t.CheckpointPath == "" {// Maxx call exec.cmd(docker-runc start $CID)// D:\study\go\containerd-docker-v1.12.x\runtime\process.goif err := process.Start(); err != nil {......}
这里有两个Start()函数, 我们分别看。

1. t.Container.Start

func (c *container) Start(checkpointPath string, s Stdio) (Process, error) {processRoot := filepath.Join(c.root, c.id, InitProcessID)if err := os.Mkdir(processRoot, 0755); err != nil {return nil, err}// Maxx  start shim process cmd/*docker-containerd-shim 817c43b3f5794d0e5dfdb92acf60fe7653b3efc33a4388733d357d00a8d8ae1a /var/run/docker/libcontainerd/817c43b3f5794d0e5dfdb92acf60fe7653b3efc33a4388733d357d00a8d8ae1a docker-runc*/cmd := exec.Command(c.shim,c.id, c.bundle, c.runtime,)cmd.Dir = processRootcmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true,}spec, err := c.readSpec()if err != nil {return nil, err}config := &processConfig{checkpoint:  checkpointPath,root:        processRoot,id:          InitProcessID,c:           c,stdio:       s,spec:        spec,processSpec: specs.ProcessSpec(spec.Process),}// Maxx p, err := newProcess(config)if err != nil {return nil, err}// Max run the cmd, now containerd end,  turn to runcif err := c.createCmd(InitProcessID, cmd, p); err != nil {return nil, err}return p, nil}


createCmd() 调用cmd.Start()系统命令启动docker-containerd-shim进程。

func (c *container) createCmd(pid string, cmd *exec.Cmd, p *process) error {p.cmd = cmd// Maxx if err := cmd.Start(); err != nil {close(p.cmdDoneCh)if exErr, ok := err.(*exec.Error); ok {if exErr.Err == exec.ErrNotFound || exErr.Err == os.ErrNotExist {return fmt.Errorf("%s not installed on system", c.shim)}}return err}// We need the pid file to have been written to rundefer func() {go func() {err := p.cmd.Wait()if err == nil {p.cmdSuccess = true}if same, err := p.isSameProcess(); same && p.pid > 0 {// The process changed its PR_SET_PDEATHSIG, so force// kill itlogrus.Infof("containerd: %s:%s (pid %v) has become an orphan, killing it", p.container.id, p.id, p.pid)err = unix.Kill(p.pid, syscall.SIGKILL)if err != nil && err != syscall.ESRCH {logrus.Errorf("containerd: unable to SIGKILL %s:%s (pid %v): %v", p.container.id, p.id, p.pid, err)} else {for {err = unix.Kill(p.pid, 0)if err != nil {break}time.Sleep(5 * time.Millisecond)}}}close(p.cmdDoneCh)}()}()if err := c.waitForCreate(p, cmd); err != nil {return err}c.processes[pid] = preturn nil}
waitForCreate()

func (c *container) waitForCreate(p *process, cmd *exec.Cmd) error {wc := make(chan error, 1)go func() {for {if _, err := p.getPidFromFile(); err != nil {if os.IsNotExist(err) || err == errInvalidPidInt {alive, err := isAlive(cmd)if err != nil {wc <- errreturn}if !alive {// runc could have failed to run the container so lets get the error// out of the logs or the shim could have encountered an errormessages, err := readLogMessages(filepath.Join(p.root, "shim-log.json"))if err != nil {wc <- errreturn}for _, m := range messages {if m.Level == "error" {wc <- fmt.Errorf("shim error: %v", m.Msg)return}}// no errors reported back from shim, check for runc/runtime errorsmessages, err = readLogMessages(filepath.Join(p.root, "log.json"))if err != nil {if os.IsNotExist(err) {err = ErrContainerNotStarted}wc <- errreturn}for _, m := range messages {if m.Level == "error" {wc <- fmt.Errorf("oci runtime error: %v", m.Msg)return}}wc <- ErrContainerNotStartedreturn}time.Sleep(15 * time.Millisecond)continue}wc <- errreturn}// the pid file was read successfullywc <- nilreturn}}()select {case err := <-wc:if err != nil {return err}err = p.saveStartTime()if err != nil {logrus.Warnf("containerd: unable to save %s:%s starttime: %v", p.container.id, p.id, err)}return nilcase <-time.After(c.timeout):cmd.Process.Kill()cmd.Wait()return ErrContainerStartTimeout}}


// Start starts the specified command but does not wait for it to complete.//// The Wait method will return the exit code and release associated resources// once the command exits.func (c *Cmd) Start() error {if c.lookPathErr != nil {c.closeDescriptors(c.closeAfterStart)c.closeDescriptors(c.closeAfterWait)return c.lookPathErr}if runtime.GOOS == "windows" {lp, err := lookExtensions(c.Path, c.Dir)if err != nil {c.closeDescriptors(c.closeAfterStart)c.closeDescriptors(c.closeAfterWait)return err}c.Path = lp}if c.Process != nil {return errors.New("exec: already started")}if c.ctx != nil {select {case <-c.ctx.Done():c.closeDescriptors(c.closeAfterStart)c.closeDescriptors(c.closeAfterWait)return c.ctx.Err()default:}}type F func(*Cmd) (*os.File, error)for _, setupFd := range []F{(*Cmd).stdin, (*Cmd).stdout, (*Cmd).stderr} {fd, err := setupFd(c)if err != nil {c.closeDescriptors(c.closeAfterStart)c.closeDescriptors(c.closeAfterWait)return err}c.childFiles = append(c.childFiles, fd)}c.childFiles = append(c.childFiles, c.ExtraFiles...)var err error// Maxx start 容器中的物理进程init// D:\study\go\runc-master\libcontainer\process_linux.goc.Process, err = os.StartProcess(c.Path, c.argv(), &os.ProcAttr{Dir:   c.Dir,Files: c.childFiles,Env:   c.envv(),// Maxx  SysProcAttr字段中则填充了各种runC所需启用的namespace等属性 Sys:   c.SysProcAttr,})if err != nil {c.closeDescriptors(c.closeAfterStart)c.closeDescriptors(c.closeAfterWait)return err}c.closeDescriptors(c.closeAfterStart)c.errch = make(chan error, len(c.goroutine))for _, fn := range c.goroutine {go func(fn func() error) {c.errch <- fn()}(fn)}if c.ctx != nil {c.waitDone = make(chan struct{})go func() {select {case <-c.ctx.Done():c.Process.Kill()case <-c.waitDone:}}()}return nil}



2.  process.Start

// Start unblocks the associated container init process.// This should only be called on the process with ID "init"func (p *process) Start() error {if p.ID() == InitProcessID {var (errC = make(chan error, 1)args = append(p.container.runtimeArgs, "start", p.container.id)cmd  = exec.Command(p.container.runtime, args...))go func() {out, err := cmd.CombinedOutput()if err != nil {errC <- fmt.Errorf("%s: %q", err.Error(), out)}errC <- nil}()select {case err := <-errC:if err != nil {return err}case <-p.cmdDoneCh:if !p.cmdSuccess {if cmd.Process != nil {cmd.Process.Kill()}cmd.Wait()return ErrShimExited}err := <-errCif err != nil {return err}}}return nil}





图片源自网络


0 0
原创粉丝点击