【containerd 1.0 源码分析】ctr run container 源码分析

来源:互联网 发布:网络诈骗彩票套路 编辑:程序博客网 时间:2024/06/06 13:20

命令:

ctr run -h
NAME:
   ctr run - run a container


USAGE:
   ctr run [command options] Image|RootFS ID [COMMAND] [ARG...]


OPTIONS:
   --tty, -t            allocate a TTY for the container
   --runtime value      runtime name (io.containerd.runtime.v1.linux, io.containerd.runtime.v1.windows, io.containerd.runtime.v1.com.vmware.linux) (default: "io.containerd.runtime.v1.linux")
   --readonly           set the containers filesystem as readonly
   --net-host           enable host networking for the container
   --mount value        specify additional container mount (ex: type=bind,src=/tmp,dest=/host,options=rbind:ro)
   --env value          specify additional container environment variables (i.e. FOO=bar)
   --label value        specify additional labels (foo=bar)
   --rm                 remove the container after running
   --checkpoint value   provide the checkpoint digest to restore the container
   --snapshotter value  Snapshotter name. Empty value stands for the daemon default value.
   --rootfs             Use custom rootfs that is not managed by containerd snapshotter.


    本文以 ctr run docker.io/library/redis:latest containerd-redis 命令为例子



一. 客户端 ctr run 命令分析


    1.1 cmd/ctr/run.go 中,根据 ctr run [command options] Image|RootFS ID [COMMAND] [ARG...],Image 和 id 必须参数项
Action: func(context *cli.Context) error {       var (              err             error              checkpointIndex digest.Digest              ctx, cancel = appContext(context)              id          = context.Args().Get(1)              imageRef    = context.Args().First()              tty         = context.Bool("tty")       )       defer cancel()       if imageRef == "" {              return errors.New("image ref must be provided")       }       if id == "" {              return errors.New("container id must be provided")       }       if raw := context.String("checkpoint"); raw != "" {              if checkpointIndex, err = digest.Parse(raw); err != nil {                     return err              }       }

    1.2 context.Bool("rm") 一次性调用就删除,
Action: func(context *cli.Context) error {       if raw := context.String("checkpoint"); raw != "" {              if checkpointIndex, err = digest.Parse(raw); err != nil {                     return err              }       }       client, err := newClient(context)       if err != nil {              return err       }       container, err := newContainer(ctx, client, context)       if err != nil {              return err       }       if context.Bool("rm") {              defer container.Delete(ctx, containerd.WithSnapshotCleanup)       }       task, err := newTask(ctx, container, checkpointIndex, tty)       if err != nil {              return err       }       defer task.Delete(ctx)       statusC := make(chan uint32, 1)       go func() {              status, err := task.Wait(ctx)              if err != nil {                     logrus.WithError(err).Error("wait process")              }              statusC <- status       }()       var con console.Console       if tty {              con = console.Current()              defer con.Reset()              if err := con.SetRaw(); err != nil {                     return err              }       }       if err := task.Start(ctx); err != nil {              return err       }       if tty {              if err := handleConsoleResize(ctx, task, con); err != nil {                     logrus.WithError(err).Error("console resize")              }       } else {              sigc := forwardAllSignals(ctx, task)              defer stopCatch(sigc)       }       status := <-statusC       if _, err := task.Delete(ctx); err != nil {              return err       }       if status != 0 {              return cli.NewExitError("", int(status))       }       return nil},

    1.3 newContainer 直接调用 NewContainer,发送 GRPC 请求 Create 方法,服务端实现在第二章节讲解:
// NewContainer will create a new container in container with the provided id// the id must be unique within the namespacefunc (c *Client) NewContainer(ctx context.Context, id string, opts ...NewContainerOpts) (Container, error) {       container := containers.Container{              ID: id,              Runtime: containers.RuntimeInfo{                     Name: c.runtime,              },       }       for _, o := range opts {              if err := o(ctx, c, &container); err != nil {                     return nil, err              }       }       r, err := c.ContainerService().Create(ctx, container)       if err != nil {              return nil, err       }       return containerFromRecord(c, r), nil}

    1.4 newTask 主要调用 NewTask 函数实现的接口
func (c *container) NewTask(ctx context.Context, ioCreate IOCreation, opts ...NewTaskOpts) (Task, error) {       c.mu.Lock()       defer c.mu.Unlock()       i, err := ioCreate(c.c.ID)       if err != nil {              return nil, err       }       request := &tasks.CreateTaskRequest{              ContainerID: c.c.ID,              Terminal:    i.Terminal,              Stdin:       i.Stdin,              Stdout:      i.Stdout,              Stderr:      i.Stderr,       }       if c.c.RootFS != "" {              // get the rootfs from the snapshotter and add it to the request              mounts, err := c.client.SnapshotService(c.c.Snapshotter).Mounts(ctx, c.c.RootFS)              if err != nil {                     return nil, err              }              for _, m := range mounts {                     request.Rootfs = append(request.Rootfs, &types.Mount{                            Type:    m.Type,                            Source:  m.Source,                            Options: m.Options,                     })              }       }       var info TaskInfo       for _, o := range opts {              if err := o(ctx, c.client, &info); err != nil {                     return nil, err              }       }       if info.RootFS != nil {              for _, m := range info.RootFS {                     request.Rootfs = append(request.Rootfs, &types.Mount{                            Type:    m.Type,                            Source:  m.Source,                            Options: m.Options,                     })              }       }       if info.Options != nil {              any, err := typeurl.MarshalAny(info.Options)              if err != nil {                     return nil, err              }              request.Options = any       }       t := &task{              client: c.client,              io:     i,              id:     c.ID(),       }       if info.Checkpoint != nil {              request.Checkpoint = info.Checkpoint              // we need to defer the create call to start              t.deferred = request       } else {              response, err := c.client.TaskService().Create(ctx, request)              if err != nil {                     return nil, err              }              t.pid = response.Pid       }       return t, nil}

    c.client.TaskService().Create 传给服务端对应的方法 services/tasks/service.go 第三张讲解


二. 服务端 create 命令分析


   2.1 _Containers_Create_Handler 函数路径 api/services/containes/v1/containers.pb.go,服务端收到 GRPC 请求,调用 Create 方法
func _Containers_Create_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {       in := new(CreateContainerRequest)       if err := dec(in); err != nil {              return nil, err       }       if interceptor == nil {              return srv.(ContainersServer).Create(ctx, in)       }       info := &grpc.UnaryServerInfo{              Server:     srv,              FullMethod: "/containerd.services.containers.v1.Containers/Create",       }       handler := func(ctx context.Context, req interface{}) (interface{}, error) {              return srv.(ContainersServer).Create(ctx, req.(*CreateContainerRequest))       }       return interceptor(ctx, in, info, handler)}

    2.2 services/containers/service.go 中 重要的函数 store.Create 为实现的接口
func (s *Service) Create(ctx context.Context, req *api.CreateContainerRequest) (*api.CreateContainerResponse, error) {       var resp api.CreateContainerResponse       if err := s.withStoreUpdate(ctx, func(ctx context.Context, store containers.Store) error {              container := containerFromProto(&req.Container)              created, err := store.Create(ctx, container)                resp.Container = containerToProto(&created)              return nil       }); err != nil {              return &resp, errdefs.ToGRPC(err)       }       if err := s.publisher.Publish(ctx, "/containers/create", &eventsapi.ContainerCreate{              ID:    resp.Container.ID,              Image: resp.Container.Image,              Runtime: &eventsapi.ContainerCreate_Runtime{                     Name:    resp.Container.Runtime.Name,                     Options: resp.Container.Runtime.Options,              },       }); err != nil {              return &resp, err       }       return &resp, nil}

    2.3 根据 withStore 函数可以得到 store 为 metadata.NewContainerStore,路径 /metadata/containers.go 中,containerStore 结构体是包裹的是操作数据库
type containerStore struct {       tx *bolt.Tx}func NewContainerStore(tx *bolt.Tx) containers.Store {       return &containerStore{              tx: tx,       }}

    2.4 Create 函数中创建存容器的 bucket,证明没有container 在 2.4.1 中讲解,readContainer 将数据读入到 container 中。
func (s *containerStore) Create(ctx context.Context, container containers.Container) (containers.Container, error) {       namespace, err := namespaces.NamespaceRequired(ctx)       if err != nil {              return containers.Container{}, err       }       if err := identifiers.Validate(container.ID); err != nil {              return containers.Container{}, err       }       bkt, err := createContainersBucket(s.tx, namespace)       if err != nil {              return containers.Container{}, err       }       cbkt, err := bkt.CreateBucket([]byte(container.ID))       if err != nil {              if err == bolt.ErrBucketExists {                     err = errors.Wrapf(errdefs.ErrAlreadyExists, "content %q", container.ID)              }              return containers.Container{}, err       }       container.CreatedAt = time.Now().UTC()       container.UpdatedAt = container.CreatedAt       if err := writeContainer(cbkt, &container); err != nil {              return containers.Container{}, errors.Wrap(err, "failed to write container")       }       return container, nil}

    这部分主要是创建 bucket 操作数据库


三. 服务端 Task 分析


   3.1 路径 services/tasks/service.go 
func (s *Service) Create(ctx context.Context, r *api.CreateTaskRequest) (*api.CreateTaskResponse, error)

   3.1.1 getContainer 根据容器 ID 拿到容器信息
container, err := s.getContainer(ctx, r.ContainerID)if err != nil {       return nil, errdefs.ToGRPC(err)}

   3.1.2 创建 runtime 的参数

opts := runtime.CreateOpts{       Spec: container.Spec,       IO: runtime.IO{              Stdin:    r.Stdin,              Stdout:   r.Stdout,              Stderr:   r.Stderr,              Terminal: r.Terminal,       },       Checkpoint: checkpointPath,       Options:    r.Options,}for _, m := range r.Rootfs {       opts.Rootfs = append(opts.Rootfs, mount.Mount{              Type:    m.Type,              Source:  m.Source,              Options: m.Options,       })}

   3.1.3 getRuntime 根据名返回接口 3.1.3.1 所示:

runtime, err := s.getRuntime(container.Runtime.Name)if err != nil {       return nil, err}

type Service struct {       runtimes  map[string]runtime.Runtime       db        *bolt.DB       store     content.Store       publisher events.Publisher}
    

   3.1.3.1 Runtime 接口

// Runtime is responsible for the creation of containers for a certain platform,// arch, or custom usage.type Runtime interface {       // ID of the runtime       ID() string       // Create creates a task with the provided id and options.       Create(ctx context.Context, id string, opts CreateOpts) (Task, error)       // Get returns a task.       Get(context.Context, string) (Task, error)       // Tasks returns all the current tasks for the runtime.       // Any container runs at most one task at a time.       Tasks(context.Context) ([]Task, error)       // Delete removes the task in the runtime.       Delete(context.Context, Task) (*Exit, error)}

   3.1.4 runtime.Create 调用的为 linux/runtime.go 中 Create 方法,第四章节讲解

c, err := runtime.Create(ctx, r.ContainerID, opts)if err != nil {       return nil, errors.Wrap(err, "runtime create failed")}

 

四. 服务端 Runtime 分析


   4.1 路径 linux/runtime.go 函数 Create
func (r *Runtime) Create(ctx context.Context, id string, opts runtime.CreateOpts) (_ runtime.Task, err error)

    4.1.1 newBundle 根据传入的路径和 ID 创建目录文件,/var/lib/containerd/io.containerd.runtime.v1.linux/default
bundle, err := newBundle(filepath.Join(r.root, namespace), namespace, id, opts.Spec.Value, r.events)if err != nil {       return nil, err}

    4.1.2 NewShime 连接到 shim,返回连接到 shime 的客户端结构体
s, err := bundle.NewShim(ctx, r.shim, r.address, r.remote, r.shimDebug, opts)if err != nil {       return nil, err}

   4.1.2 client.WithStart 启动一个 containerd-shim 进程,containerd-shim --namespace default --address /run/containerd/containerd.sock
// NewShim connects to the shim managing the bundle and tasksfunc (b *bundle) NewShim(ctx context.Context, binary, grpcAddress string, remote, debug bool, createOpts runtime.CreateOpts) (*client.Client, error) {       opt := client.WithStart(binary, grpcAddress, debug)       if !remote {              opt = client.WithLocal(b.events)       }       var options runcopts.CreateOptions       if createOpts.Options != nil {              v, err := typeurl.UnmarshalAny(createOpts.Options)              if err != nil {                     return nil, err              }              options = *v.(*runcopts.CreateOptions)       }       return client.New(ctx, client.Config{              Address:    b.shimAddress(),              Path:       b.path,              Namespace:  b.namespace,              CgroupPath: options.ShimCgroup,       }, opt)}

   4.1.3 填充 CreateTaskRequest 结构体,发送 GRPC 给 shim 创建,Create 路径为 linux/shim/service.go,4.1.3.1 节讲解
sopts := &shim.CreateTaskRequest{       ID:         id,       Bundle:     bundle.path,       Runtime:    r.runtime,       Stdin:      opts.IO.Stdin,       Stdout:     opts.IO.Stdout,       Stderr:     opts.IO.Stderr,       Terminal:   opts.IO.Terminal,       Checkpoint: opts.Checkpoint,       Options:    opts.Options,}for _, m := range opts.Rootfs {       sopts.Rootfs = append(sopts.Rootfs, &types.Mount{              Type:    m.Type,              Source:  m.Source,              Options: m.Options,       })}if _, err = s.Create(ctx, sopts); err != nil {       return nil, errdefs.FromGRPC(err)}

   4.1.3.1 Create 函数中 newInitProcess 做的事情比较多在第五章节讲解
func (s *Service) Create(ctx context.Context, r *shimapi.CreateTaskRequest) (*shimapi.CreateTaskResponse, error) {       process, err := newInitProcess(ctx, s.platform, s.path, s.namespace, r)              // save the main task id and bundle to the shim for additional requests       s.id = r.ID       s.bundle = r.Bundle       s.initProcess = process       pid := process.Pid()       s.processes[r.ID] = process       s.mu.Unlock()       cmd := &reaper.Cmd{              ExitCh: make(chan int, 1),       }       reaper.Default.Register(pid, cmd)       s.events <- &eventsapi.TaskCreate{              ContainerID: r.ID,              Bundle:      r.Bundle,              Rootfs:      r.Rootfs,              IO: &eventsapi.TaskIO{                     Stdin:    r.Stdin,                     Stdout:   r.Stdout,                     Stderr:   r.Stderr,                     Terminal: r.Terminal,              },              Checkpoint: r.Checkpoint,              Pid:        uint32(pid),       }       go s.waitExit(process, pid, cmd)       return &shimapi.CreateTaskResponse{              Pid: uint32(pid),       }, nil}


五. 服务端 newInitProcess 分析


   5.1 路径 linux/shim/init.go
rootfs := filepath.Join(path, "rootfs")
    path 大致为 /var/lib/containerd/io.containerd.runtime.v1.linux/default/${container-id}

   填充 Runc 结构体和 initProcess 结构体
runtime := &runc.Runc{       Command:      r.Runtime,       Log:          filepath.Join(path, "log.json"),       LogFormat:    runc.JSON,       PdeathSignal: syscall.SIGKILL,       Root:         filepath.Join(RuncRoot, namespace),}p := &initProcess{       id:       r.ID,       bundle:   r.Bundle,       runtime:  runtime,       platform: plat,       stdio: stdio{              stdin:    r.Stdin,              stdout:   r.Stdout,              stderr:   r.Stderr,              terminal: r.Terminal,       },       rootfs: rootfs,}

    5.2 NewPipeIO 为 runc 创建管道
if r.Terminal {       if socket, err = runc.NewConsoleSocket(filepath.Join(path, "pty.sock")); err != nil {              return nil, errors.Wrap(err, "failed to create OCI runtime console socket")       }       defer os.Remove(socket.Path())} else {       if io, err = runc.NewPipeIO(0, 0); err != nil {              return nil, errors.Wrap(err, "failed to create OCI runtime io pipes")       }       p.io = io}

   5.4 路径 github.com/containerd/go-runc/runc.go,如果第一次创建则执行 p.runtime.Create 函数第六章讲解
if err := p.runtime.Create(context, r.ID, r.Bundle, opts); err != nil {       return nil, p.runtimeError(err, "OCI runtime create failed")}



六. 服务端 runc 分析


    路径 vendor/github.com/containerd/go-runc/runc.go
   结构体 Runc,内容也不复杂
// Runc is the client to the runc clitype Runc struct {       //If command is empty, DefaultCommand is used       Command       string       Root          string       Debug         bool       Log           string       LogFormat     Format       PdeathSignal  syscall.Signal       Setpgid       bool       Criu          string       SystemdCgroup string}

   6.1 Create 创建一个新的容器,成功返回 pid,Monitor.Start 主要是创建文件描述符,启动进程等
    cmd 大致为 runc --root /run/containerd/runc/default --log /var/lib/containerd/io.containerd.runtime.v1.linux/default/${container-id}/log.json --pid-file /var/lib/containerd/io.containerd.runtime.v1.linux/default/${container-id}/init.pid,最终调用的是 runc 命令
// Create creates a new container and returns its pid if it was created successfullyfunc (r *Runc) Create(context context.Context, id, bundle string, opts *CreateOpts) error {       args := []string{"create", "--bundle", bundle}       if opts != nil {              oargs, err := opts.args()              if err != nil {                     return err              }              args = append(args, oargs...)       }       cmd := r.command(context, append(args, id)...)       if opts != nil && opts.IO != nil {              opts.Set(cmd)       }       cmd.ExtraFiles = opts.ExtraFiles       if cmd.Stdout == nil && cmd.Stderr == nil {              data, err := Monitor.CombinedOutput(cmd)              if err != nil {                     return fmt.Errorf("%s: %s", err, data)              }              return nil       }       if err := Monitor.Start(cmd); err != nil {              return err       }       if opts != nil && opts.IO != nil {              if c, ok := opts.IO.(StartCloser); ok {                     if err := c.CloseAfterStart(); err != nil {                            return err                     }              }       }       _, err := Monitor.Wait(cmd)       return err}



目录 /var/lib/containerd/io.containerd.runtime.v1.linux/default/${container-id}/config.json 大致如下内容:

{
    "ociVersion": "1.0.0",
    "process": {
        "user": {
            "uid": 0,
            "gid": 0
        },
        "args": [
            "docker-entrypoint.sh",
            "redis-server"
        ],
        "env": [
            "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
            "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
            "GOSU_VERSION=1.10",
            "REDIS_VERSION=4.0.1",
            "REDIS_DOWNLOAD_URL=http://download.redis.io/releases/redis-4.0.1.tar.gz",
            "REDIS_DOWNLOAD_SHA=2049cd6ae9167f258705081a6ef23bb80b7eff9ff3d0d7481e89510f27457591"
        ],
        "cwd": "/data",
        "capabilities": {
            "bounding": [
                "CAP_CHOWN",
                "CAP_DAC_OVERRIDE",
                "CAP_FSETID",
                "CAP_FOWNER",
                "CAP_MKNOD",
                "CAP_NET_RAW",
                "CAP_SETGID",
                "CAP_SETUID",
                "CAP_SETFCAP",
                "CAP_SETPCAP",
                "CAP_NET_BIND_SERVICE",
                "CAP_SYS_CHROOT",
                "CAP_KILL",
                "CAP_AUDIT_WRITE"
            ],
            "effective": [
                "CAP_CHOWN",
                "CAP_DAC_OVERRIDE",
                "CAP_FSETID",
                "CAP_FOWNER",
                "CAP_MKNOD",
                "CAP_NET_RAW",
                "CAP_SETGID",
                "CAP_SETUID",
                "CAP_SETFCAP",
                "CAP_SETPCAP",
                "CAP_NET_BIND_SERVICE",
                "CAP_SYS_CHROOT",
                "CAP_KILL",
                "CAP_AUDIT_WRITE"
            ],
            "inheritable": [
                "CAP_CHOWN",
                "CAP_DAC_OVERRIDE",
                "CAP_FSETID",
                "CAP_FOWNER",
                "CAP_MKNOD",
                "CAP_NET_RAW",
                "CAP_SETGID",
                "CAP_SETUID",
                "CAP_SETFCAP",
                "CAP_SETPCAP",
                "CAP_NET_BIND_SERVICE",
                "CAP_SYS_CHROOT",
                "CAP_KILL",
                "CAP_AUDIT_WRITE"
            ],
            "permitted": [
                "CAP_CHOWN",
                "CAP_DAC_OVERRIDE",
                "CAP_FSETID",
                "CAP_FOWNER",
                "CAP_MKNOD",
                "CAP_NET_RAW",
                "CAP_SETGID",
                "CAP_SETUID",
                "CAP_SETFCAP",
                "CAP_SETPCAP",
                "CAP_NET_BIND_SERVICE",
                "CAP_SYS_CHROOT",
                "CAP_KILL",
                "CAP_AUDIT_WRITE"
            ]
        },
        "rlimits": [
            {
                "type": "RLIMIT_NOFILE",
                "hard": 1024,
                "soft": 1024
            }
        ],
        "noNewPrivileges": true
    },
    "root": {
        "path": "rootfs"
    },
    "mounts": [
        {
            "destination": "/proc",
            "type": "proc",
            "source": "proc"
        },
        {
            "destination": "/dev",
            "type": "tmpfs",
            "source": "tmpfs",
            "options": [
                "nosuid",
                "strictatime",
                "mode=755",
                "size=65536k"
            ]
        },
        {
            "destination": "/dev/pts",
            "type": "devpts",
            "source": "devpts",
            "options": [
                "nosuid",
                "noexec",
                "newinstance",
                "ptmxmode=0666",
                "mode=0620",
                "gid=5"
            ]
        },
        {
            "destination": "/dev/shm",
            "type": "tmpfs",
            "source": "shm",
            "options": [
                "nosuid",
                "noexec",
                "nodev",
                "mode=1777",
                "size=65536k"
            ]
        },
        {
            "destination": "/dev/mqueue",
            "type": "mqueue",
            "source": "mqueue",
            "options": [
                "nosuid",
                "noexec",
                "nodev"
            ]
        },
        {
            "destination": "/sys",
            "type": "sysfs",
            "source": "sysfs",
            "options": [
                "nosuid",
                "noexec",
                "nodev",
                "ro"
            ]
        },
        {
            "destination": "/run",
            "type": "tmpfs",
            "source": "tmpfs",
            "options": [
                "nosuid",
                "strictatime",
                "mode=755",
                "size=65536k"
            ]
        }
    ],
    "linux": {
        "resources": {
            "devices": [
                {
                    "allow": false,
                    "access": "rwm"
                }
            ]
        },
        "namespaces": [
            {
                "type": "pid"
            },
            {
                "type": "ipc"
            },
            {
                "type": "uts"
            },
            {
                "type": "mount"
            },
            {
                "type": "network"
            }
        ]
    }
}

原创粉丝点击