libphenom 学习笔记

来源：互联网发布：最好的win10优化工具编辑：程序博客网时间：2024/06/07 10:50

参考资料:

libphenom文档
How-does-libphenom-work

引用计数

源码: include/phenom/refcnt.h

typedef int ph_refcnt_t;void ph_refcnt_add(ph_refcnt_t *ref)// Returns true if we just released the final referencebool ph_refcnt_del(ph_refcnt_t *ref)

引用计数管理对象的生命期

void ph_string_delref(ph_string_t *str){  if (!ph_refcnt_del(&str->ref)) {    return;  }  if (str->mt >= 0) {    ph_mem_free(str->mt, str->buf);    str->mt = PH_MEMTYPE_INVALID;  }  if (str->slice) {    ph_string_delref(str->slice);    str->slice = 0;  }  str->buf = 0;  if (!str->onstack) {    ph_mem_free(mt_string, str);  }}

Counter

源码: corelib/counter.c; include/phenom/counter.h; tests/counter.c; corelib/debug_console.c

计数器。用来了解事物发生的频率。实际用在memory, job子系统中。

scope就是一系列在逻辑上处于同一组的counter的集合概念。
在使用counter的时候最初就需要创建scope。
在定义scope的时候需要确定该scope内最多能有多少个counter注册进去，这个叫slot。
scope互相之间可以有父子继承关系。

我们要创建block的scenario只有两个：
当你在同一个线程内需要频繁进行计数器更新的时候;
当你在一个线程内对多个计数器进行更新，并期望这个操作尽可能快的时候;

开启debug-console，可以输出系统的计数。

~$> echo counters | nc -UC /tmp/phenom-debug-console                   iosched/dispatched             5144                  iosched/timer_busy                0                 iosched/timer_ticks             5035          memory.ares.channel/allocs                1           memory.ares.channel/bytes              104           memory.ares.channel/frees                0             memory.ares.channel/oom                0

上面的最高层的scope是memory和iosched。 memory的子scope是area, area的子scope是channel.
channel里面有4个slots, 分别记录了4个counter. name分别是alloc, bytes, frees, oom.
对应的counter分别是1， 104， 0， 0.

memory

源码：corelib/memory.c; include/phenom/memory; tests/memory.c; corelib/debug_console.c

基于counter子系统的内存分配器。

通过下面2个函数注册新的memtype。

ph_memtype_t ph_memtype_register(const ph_memtype_def_t *def);ph_memtype_t ph_memtype_register_block(    uint8_t num_types,    const ph_memtype_def_t *defs,    ph_memtype_t *types);

memtype支持的操作，malloc, realloc, free

void *ph_mem_alloc(ph_memtype_t memtype)void *ph_mem_alloc_size(ph_memtype_t memtype, uint64_t size)void *ph_mem_realloc(ph_memtype_t memtype, void *ptr, uint64_t size)void  ph_mem_free(ph_memtype_t memtype, void *ptr)

通过下面函数就可以了解内存的分配情况

void ph_mem_stat(ph_memtype_t memtype, ph_mem_stats_t *stats);struct ph_mem_stats {  /* the definition */  const ph_memtype_def_t *def;  /* current amount of allocated memory in bytes */  uint64_t bytes;  /* total number of out-of-memory events (allocation failures) */  uint64_t oom;  /* total number of successful allocation events */  uint64_t allocs;  /* total number of calls to free */  uint64_t frees;  /* total number of calls to realloc (that are not themselves   * equivalent to an alloc or free) */  uint64_t reallocs;};

开启debug-console，可以输出内存使用情况 (非常酷)

$> echo memory | nc -UC /tmp/phenom-debug-console                WHAT     BYTES       OOM    ALLOCS     FREES   REALLOC    threadpool/pool       832         0         1         0         0  threadpool/ringbuf      8480         0         2         0         0     hashtable/table      3136         0         3         0         0           hook/hook         8         0         1         0         0           hook/head         0         0         0         0         0         hook/string        19         0         1         0         0          hook/unreg         0         0         0         0         0       stream/stream       272         0         2         0         0       buffer/object       120         0         3         0         0           buffer/8k     16384         0         2         0         0          buffer/16k         0         0         0         0         0          buffer/32k         0         0         0         0         0          buffer/64k         0         0         0         0         0        buffer/vsize         0         0         0         0         0        buffer/queue        48         0         2         0         0    buffer/queue_ent        64         0         2         0         0

strings

源码： corelib/string.c; include/phenom/string.c; tests/string.c;

设计目标: http://facebook.github.io/libphenom/#string

实现

 typedef struct ph_string ph_string_t; struct ph_string {   ph_refcnt_t ref;   // 引用计数   ph_memtype_t mt;   uint32_t len, alloc;   // 使用字节数，总字节数    char *buf;    // 指向实际的存储   ph_string_t *slice;   bool onstack;   // 是否在stack上 };

其中参数mt的值, 用负的表示stack-based growable，正的表示heap-allocated growable

ph_result_t ph_string_append_buf(ph_string_t *str,const char *buf, uint32_t len){    if (len + str->len > str->alloc) {    // Not enough room    if (str->mt == PH_STRING_STATIC) {      // Just clamp to the available space      len = str->alloc - str->len;    } else {      // Grow it      uint32_t nsize = ph_power_2(str->len + len);      char *nbuf;      // Negative memtypes encode the desired memtype as the negative      // value.  Allocate a buffer from scratch using the desired memtype      if (str->mt < 0) {        nbuf = ph_mem_alloc_size(-str->mt, nsize);      } else {        nbuf = ph_mem_realloc(str->mt, str->buf, nsize);      }      if (nbuf == NULL) {        return PH_NOMEM;      }      if (str->mt < 0) {        // Promote from static growable to heap allocated growable        memcpy(nbuf, str->buf, str->len);        str->mt = -str->mt;      }      str->buf = nbuf;      str->alloc = nsize;    }  }  memcpy(str->buf + str->len, buf, len);  str->len += len;  return PH_OK;}

slice的创建

ph_string_t *ph_string_make_slice(ph_string_t *str,    uint32_t start, uint32_t len){  ph_string_t *slice;  if (start == 0 && len == str->len) {    ph_string_addref(str);    return str;  }  slice = ph_mem_alloc(mt_string);  if (!slice) {    return NULL;  }  ph_string_init_slice(slice, str, start, len);  return slice;}

子模块的初始化

例如 memory.c 里有以下指令

PH_LIBRARY_INIT_PRI(memory_init, memory_destroy, 3)

include/phenom/defs.h定义

void ph_library_init_register(struct ph_library_init_entry *ent);#define PH_LIBRARY_INIT_PRI(initfn, finifn, pri) \  static __attribute__((constructor)) \    void ph_defs_gen_symbol(ph__lib__init__)(void) { \        static struct ph_library_init_entry ent = { \              __FILE__, __LINE__, pri, initfn, finifn, 0 \                };        ph_library_init_register(&ent); \}

attribute((constructor)), 使的函数体在main开始运行前，自动调用;
具体见 http://gcc.gnu.org/onlinedocs/gcc/Function-Attributes.html;
所以meory_init, memory_destroy被注册

每1个使用libphenom的程序都要求，先调用ph_library_init，每1个注册init函数都被执行。

for (i = 0; i < num_init_ents; i++) {    struct ph_library_init_entry *ent = init_funcs[i];    if (ent->init) {      ent->init();    }}

Stream

源码：目录 corelib/streams; include/phenom/stream.h; tests/stream.c

libPhenom provides a portable layer over streaming IO.
CSAPP解释了标准IO为什么不能使用在socket上。
stream支持socket, ssl, fd, string.

实现

/** Represents a stream * * Streams maintain a buffer for read/write operations. */struct ph_stream {  const struct ph_stream_funcs *funcs;  void *cookie;  unsigned flags;  pthread_mutex_t lock;  // if data is in the read buffer, these are non-NULL  unsigned char *rpos, *rend;  // if data is in the write buffer, these are non-NULL  unsigned char *wpos, *wend;  unsigned char *wbase;  // associated buffer.  It can be either used in read mode  // or write mode, but not both  unsigned char *buf;  uint32_t bufsize;  int last_err;  ph_iomask_t need_mask;};/** Defines a stream implementation. * * If any of these return false, it indicates an error. * The implementation must set stm->last_err to the corresponding * errno value in that case (and only in the failure case). */struct ph_stream_funcs {  bool (*close)(ph_stream_t *stm);  bool (*readv)(ph_stream_t *stm, const struct iovec *iov,      int iovcnt, uint64_t *nread);  bool (*writev)(ph_stream_t *stm, const struct iovec *iov,      int iovcnt, uint64_t *nwrote);  bool (*seek)(ph_stream_t *stm, int64_t delta,      int whence, uint64_t *newpos);};

读写公用1个缓存区。通过定义 struct ph_stream_funcs，来支持不同流类型。

buffers

源码: corelib/buf.c; include/phenom/buffer.h; tests/buf.c

设计目标： http://facebook.github.io/libphenom/index.html#buffer;

ph_buf_t作为ph_bufq_t的底层实现，并没有单独使用

struct ph_buf {  ph_refcnt_t ref;  ph_buf_t *slice;  uint8_t *buf;  uint64_t size;  ph_memtype_t memtype;};ph_buf_t *ph_buf_new(uint64_t size);ph_buf_t *ph_buf_slice(ph_buf_t *buf, uint64_t start, uint64_t len);

ph_buf_new 创建1个新的buffer, 新的buffer的大小，调用函数select_size。主要分为8192，16k, 32k等。
ph_buf_slice 创建1个slice, slice实际上没有分配内存。
特殊情况:start=0, len等于buf的长度，只是ph_buf_addref(buf).

buf子系统中不同的内存分配，都分别区分开。

static ph_memtype_def_t defs[] = {  { "buffer", "object", sizeof(ph_buf_t), PH_MEM_FLAGS_ZERO },  { "buffer", "8k", 8*1024, 0 },  { "buffer", "16k", 16*1024, 0 },  { "buffer", "32k", 32*1024, 0 },  { "buffer", "64k", 64*1024, 0 },  { "buffer", "vsize", 0, 0 },  { "buffer", "queue", sizeof(ph_bufq_t), PH_MEM_FLAGS_ZERO },  { "buffer", "queue_ent", sizeof(struct ph_bufq_ent), PH_MEM_FLAGS_ZERO },};

ph_bufq_t，用作socket的用户层buffer。

struct ph_bufq_ent {  PH_STAILQ_ENTRY(ph_bufq_ent) ent;  ph_buf_t *buf;  // Offset into the buf of the data that is yet to be consumed  uint64_t rpos;  // Offset at which to append further data  uint64_t wpos;};struct ph_bufq {  PH_STAILQ_HEAD(bufqhead, ph_bufq_ent) fifo;  // Maximum amount of storage to allow  uint64_t max_size;   // 现在好像没有用？ 20131114};ph_bufq_t *ph_bufq_new(uint64_t max_size);ph_result_t ph_bufq_append(ph_bufq_t *q, const void *buf, uint64_t len,    uint64_t *added_bytes);ph_buf_t *ph_bufq_consume_bytes(ph_bufq_t *q, uint64_t len);ph_buf_t *ph_bufq_consume_record(ph_bufq_t *q, const char *delim,    uint32_t delim_len);

ph_bufq_new 创建出1个定长buffer的fifo. 默认会在fifo里放1个8192长度的buffer.
ph_bufq_append 对ph_bufq_t插入数据. 如果最后1个buffer容量不够，就会创建出1个新的buffer, 放到fifo里.
ph_bufq_consume_bytes 从ph_bufq_t读出数据。gc_bufq用来释放资源。返回的ph_buf_t是重新创建的。
ph_bufq_consume_record 读取数据到指定的record. 例如读取到”\r\n”. 调用函数find_record，需要很有耐心的实现。

Json

源码：include/phenom/json.h; 目录 corelib/variant/

提供了json的encoding，decoding的功能。

Configuration

源码: phenom/configuration.h; corelib/config.c

程序启动时有全局的配置文件(json格式)，修改程序一些行为。
该文件可以通过ph_config_load_config_file或者 getenv(“PHENOM_CONFIG_FILE”)来指定。
例如job.c 里，可以设置下面的参数来指定sleep时间

int max_sleep = ph_config_query_int("$.nbio.max_sleep", 5000);

建议应用自己的配置在路径 “$.app.”下

timer wheel

源码： include/phenom/timerwheel.h, corelib/timerwheel.c

timer wheel, 是一种定时器实现机制。概念来自”Hashed and Hierarchical Timing Wheels”.
用来管理大量的定时器。Linux内核中也用这种实现。

Alt text

定时轮的工作原理可以类比于时钟，如上图; 指针按某一个方向按固定频率轮动，每一次跳动称为一个tick。
这样可以看出定时轮由个3个重要的属性参数，ticksPerWheel（一轮的tick数），tickDuration（一个tick的持续时间）
以及 timeUnit（时间单位），例如当ticksPerWheel=60，tickDuration=1，timeUnit=秒，这就和现实中的始终的秒针走动完全类似了。

实现

PH_LIST_HEAD(            // 双向的循环链表head, 具体见phenom/queue.h    ph_timerwheel_list,    ph_timerwheel_timer);struct ph_timerwheel_timer {  PH_LIST_ENTRY(ph_timerwheel_timer) t;  struct ph_timerwheel_list *list;  struct timeval due;  int enable;  #define PH_TIMER_DISABLED    0  #define PH_TIMER_ENABLED     1  #define PH_TIMER_LOCKED      2};#define PHENOM_WHEEL_BITS 8#define PHENOM_WHEEL_SIZE (1 << PHENOM_WHEEL_BITS)     // 256struct ph_timerwheel {  struct timeval next_run;   // 下1个tick的实际时间  uint32_t tick_resolution;  // 每个tick的时间间隔  ck_rwlock_t lock;  struct {    struct ph_timerwheel_list lists[PHENOM_WHEEL_SIZE];  } buckets[4];};

ph_timerwheel提供了4个buckets, buckets存在着类似时分秒的进位关系;
下面用TV1标识buckets[0], 以此类推, TV4标识buckets[3];

TV1为第1个表，所表示的计时是 1 ~ 255 tick.
因为在一个tick上可能同时有多个timer等待超时处理，
使用ph_timerwheel_list将所有timer 串成一个链表，以便在超时时顺序处理;

TV2为第2个表, 所表示的计时是 256 ~ 65535 tick.
以此类推TV3, TV4;

在nbio子系统中，tick_resolution=100ms，每过100ms, 每1个事件循环会触发ph_timerwheel_tick函数。
用来处理下一个tick所在的所有timer.

ph_timerwheel_tick(ph_timerwheel_t *wheel,    struct timeval now,     ph_timerwheel_should_dispatch_func_t should_dispatch,     ph_timerwheel_dispatch_func_t dispatch,      void *arg)

idx 是用来遍历 TV1 的索引。每一次循环idx会定位一个当前待处理的 tick，并处理这个tick下所有超时的timer。
wheel->next_run会在每次循环后增加一个 tick_resolution，index也会随之向前移动。当index变为0时表示TV1完成了一次完整的遍历，
此时所有在 TV1 中的 timer 都被处理了，因此需要通过 cascade 将后面 TV2，TV3 等 timer list 中的timer向前移动，类似于分转成秒的操作。
这种层叠的 timer list 实现机制可以大大降低每次检查超时, timer的时间，每次中断只需要针对 TV1 进行检查，只有必要时才进行cascade。

timer wheel一个弊端就是 cascade 开销过大。在极端的条件下，同时会有多个TV需要进行cascade处理，会产生很大的时延。
这也是为什么说timeout类型的定时器是timer wheel 的主要应用环境，或者说timer wheel 是为 timeout 类型的定时器优化的。
因为timeout类型的定时器的应用场景多是错误条件的检测，这类错误发生的机率很小，通常不到超时就被删除了，因此不会产生cascade的开销。

nbio子系统，
初始化过程，ph_nbio_init–> ph_timerwheel_init(&emitters[i].wheel, me->now, WHEEL_INTERVAL_MS);
函数ph_nbio_emitter_init中，每1个emitter，创建1个timerfd，100ms后，定时器超时，timefd成为可读，触发回调函数tick_epoll;

  emitter->timer_fd = timerfd_create(  CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);  if (emitter->timer_fd == -1) {    ph_panic("timerfd_create(CLOCK_MONOTONIC) failed: `Pe%d", errno);  }  memset(&ts, 0, sizeof(ts));  ts.it_interval.tv_nsec = WHEEL_INTERVAL_MS * 1000000;  ts.it_value.tv_nsec = ts.it_interval.tv_nsec;  timerfd_settime(emitter->timer_fd, 0, &ts, NULL);  ph_job_init(&emitter->timer_job);  emitter->timer_job.callback = tick_epoll;  emitter->timer_job.fd = emitter->timer_fd;  emitter->timer_job.data = emitter;  emitter->timer_job.emitter_affinity = emitter->emitter_id;  ph_job_set_nbio(&emitter->timer_job, PH_IOMASK_READ, 0);

调用顺序: ph_nbio_emitter_init -> ph_job_set_nbio -> tick_epoll -> ph_nbio_emitter_timer_tick -> ph_timerwheel_tick

hash table

源码: include/phenom/hashtable.h；corelib/hash; tests/hashtable.c;

struct ph_ht {  uint32_t nelems;  uint64_t table_size, elem_size, mask;  const struct ph_ht_key_def *kdef;  const struct ph_ht_val_def *vdef;  /* points to the table, an array of table_size elements */  char *table;};ph_result_t ph_ht_init(ph_ht_t *ht, uint32_t size_hint,  const struct ph_ht_key_def *kdef,  const struct ph_ht_val_def *vdef){  ht->kdef = kdef;  ht->vdef = vdef;  ht->nelems = 0;  ht->table_size = ph_power_2(size_hint * 2);  ht->elem_size = sizeof(struct ph_ht_elem) + kdef->ksize + vdef->vsize;  ht->mask = ht->table_size - 1;  ht->table = ph_mem_alloc_size(mt_table, ht->elem_size * ht->table_size);  if (!ht->table) {    return PH_NOMEM;  }  return PH_OK;}

采用的是linear probing的实现。 hash桶的大小在ph_ht_init的时候传入。
如果桶满了， insert就会失败。需要显性地调用。
ph_ht_grow来手动建立hash表，没有rehash的过程。
ph_hash_bytes_murmur函数实现了Murmur Hash算法。

phenom线程

源码: include/phenom/thread.h; corelib/thread.c

struct ph_thread {  bool refresh_time;  // internal monotonic thread id  uint32_t tid;  PH_STAILQ_HEAD(pdisp, ph_job) pending_nbio, pending_pool;  struct ph_nbio_emitter *is_emitter;  int is_worker;  struct timeval now;  ck_epoch_record_t epoch_record;  ck_hs_t counter_hs;  // linkage so that a stat reader can find all counters  ck_stack_entry_t thread_linkage;  // OS level representation  pthread_t thr;  // If part of a pool, linkage in that pool  CK_LIST_ENTRY(ph_thread) pool_ent;  pid_t lwpid;#ifdef HAVE_STRERROR_R  char strerror_buf[128];#endif  // Name for debugging purposes  char name[16];};

Phenom线程上记录了

pending NBIO job 队列
pending pool job 队列
pthread_t 线程id
在pool中的结点
name

每个phenom线程分配一个全局唯一的id，对应一个pthread线程。如注释所说，tid < MAX_RINGS的phenom线程称为preferred thread，拥有自己专用的job队列，其他线程竞争共享队列，用spinlock同步。

全局的pools将所有线程池保存在链表中。其中包含用于consumer和producer等待/唤醒的结构（futex或condition variable），保存job的ring buffer、worker线程的指针等等信息。

ph_thread_spawn(func, arg)创建一个ph_thread_t线程。实际上是调用pthread_create()，让其执行ph_thread_boot()，将实际要执行的函数func() 和参数arg等信息传入。ph_thread_boot()会分配内存并创建一个新的ph_thread_t结构，执行一些初始化，然后调用传入的那个func()。

此外，封装了join、self、setaffinity等等pthread操作。

pthread_key_t

1个进程中线程直接除了线程自己的栈和寄存器之外，其他几乎都是共享的，如果线程想维护一个只属于线程自己的全局变量怎么办？
线程的私有存储解决了这个问题。

创建一个类型为 pthread_key_t 类型的变量。
调用 pthread_key_create() 来创建该变量。该函数有两个参数，第一个参数就是上面声明的 pthread_key_t 变量，
第二个参数是一个清理函数，用来在线程释放该线程存储的时候被调用。该函数指针可以设成 NULL ，
这样系统将调用默认的清理函数。当线程中需要存储特殊值的时候，可以调用 pthread_setspecific() 。
该函数有两个参数，第一个为前面声明的 pthread_key_t 变量，第二个为 void* 变量，这样你可以存储任何类型的值。
如果需要取出所存储的值，调用pthread_getspecific() 。该函数的参数为前面提到的 pthread_key_t 变量，
该函数返回 void *类型的值。 pthread_key_t无论是哪一个线程创建，其他所有的线程都是可见的，
即一个进程中只需phread_key_create()一次。看似是全局变量，然而全局的只是key值，
对于不同的线程对应的value值是不同的(通过pthread_setspcific()和pthread_getspecific()设置)。

ph_thread_self函数就用这个方式取得线程自己的句柄

JOB

job有3类

Immediate. The work is dispatched immediately on the calling thread. 接口 ph_job_dispatch_now
NBIO. The work is dispatched when a descriptor is signalled for I/O.
Pool. The work is queued to a thread pool and is dispatched as soon as a worker becomes available.
libPhenom allows multiple pools to be defined to better partition and prioritize your workload

NBIO

源码

include/phenom/job.h
tests/bench/iopipes.c
tests/iobasic.c
tests/timer.c
目录corelib/nbio

ph_nbio_init()初始化NBIO。

calloc()分配num_schedulers个emitter, 定义在struct ph_nbio_emitter，每个emitter会跟1个事件循环，和1个thread绑定。
初始化每个emitter及其timer wheel（ph_timerwheel_init()、ph_nbio_emitter_init()）
- timer_fd描述符（timerfd_create()）
- timer_job扔进pending_nbio队列，这个job被调度到时执行tick_epoll
初始化counter, 用来进行统计，可以用ph_nbio_stat进行观察

每个emitter绑定了1个事件循环

    struct ph_nbio_emitter {      ph_timerwheel_t wheel;    // 时间轮      ph_job_t timer_job;      uint32_t emitter_id;      struct timeval last_dispatch;      int io_fd, timer_fd;      ph_nbio_affine_job_stailq_t affine_jobs;   // typedef PH_STAILQ_HEAD(affine_ent, ph_nbio_affine_job)      ph_job_t affine_job;      ph_pingfd_t affine_ping;    // 用来唤醒epoll      ph_thread_t *thread;        // 跟thread绑定在一起      ph_counter_block_t *cblock;   // 计数器    };    struct ph_job {      // data associated with job      void *data;      // the callback to run when the job is dispatched      ph_job_func_t callback;      // deferred apply list      PH_STAILQ_ENTRY(ph_job) q_ent;      // whether we're in a deferred apply      bool in_apply;      // for PH_RUNCLASS_NBIO, trigger mask */      ph_iomask_t mask;      // use ph_job_get_kmask() to interpret      int kmask;      // Hashed over the scheduler threads; two jobs with      // the same emitter hash will run serially wrt. each other      uint32_t emitter_affinity;      // For nbio, the socket we're bound to for IO events      ph_socket_t fd;      // Holds timeout state      struct ph_timerwheel_timer timer;      // When targeting a thread pool, which pool      ph_thread_pool_t *pool;      // for SMR      ck_epoch_entry_t epoch_entry;      struct ph_job_def *def;    };

ph_sched_run调度NBIO

对emitters中的每个线程执行sched_loop()，emitters[0]是ph_sched_run的调用者;
sched_loop 调用 ph_nbio_emitter_run
ph_nbio_emitter_run进入Reactore模式，epoll_wait得到fd, ph_job_t *job = event[i].data.ptr;
ph_nbio_emitter_dispatch_immediate回调job之前设置的callback
定时器任务通过timefd来触发

job加入NBIO

通过ph_job_set_nbio加入JOB （ph_job_set_nbio_timeout_in实际调用ph_job_set_nbio
- 如果当前me.is_worker == 0, 执行ph_nbio_emitter_apply_io_mask，对相关的fd进行epoll_wait
- 否则放入队列me->pending_nbio
放入pending_nbio队列的job, 通过ph_sched_run –> process_deferred –> ph_nbio_emitter_apply_io_mask;
加入事件循环中
sched_run开始后，新加入job; ph_nbio_emitter_run –> ph_job_pool_apply_deferred_items –> process_deferred

Thread Pool

源码: include/phenom/thread.h; corelib/job.h; corelib/job.c; tests/tpool.c

    struct ph_thread_pool {      struct ph_thread_pool_wait consumer CK_CC_CACHELINE;      uint32_t max_queue_len;      ck_ring_t *rings[MAX_RINGS+1];      intptr_t used_rings;      ck_spinlock_t lock CK_CC_CACHELINE;      char pad1[CK_MD_CACHELINE - sizeof(ck_spinlock_t)];      struct ph_thread_pool_wait producer CK_CC_CACHELINE;      int stop;      char *name;      ph_counter_scope_t *counters;      CK_LIST_ENTRY(ph_thread_pool) plink;      ph_thread_t **threads;      uint32_t max_workers;      uint32_t num_workers;      ph_variant_t *config;    };

job分发的过程
* ph_thread_pool_define 定义1个pool;
* 函数ph_job_set_pool; job->pool = pool; 关联job和pool; PH_STAILQ_INSERT_TAIL(&me->pending_pool, job, q_ent); 放入当前线程的队列
* 执行 ph_sched_run –> process_deferred –> _ph_job_set_pool_immediate –> do_set_pool
* tid < MAX_RINGS有自己单独的ring, 其他的共享1个ring.
* wake_pool(&pool->consumer)；通知worker线程。

job的处理
* ph_sched_run –> _ph_job_pool_start_threads –> ph_thread_pool_start_workers –> worker_thread

ph_thread_pool_signal_stop函数用来终止

socket

源码:

include/phenom/socket.h
目录 corelib/net
tests/sockaddr.c

libphenom对socket io进行了封装。包括描述符ph_socket_t, 通用的地址结构phenom_sockaddr，
ph_sock_t封装了读写buffer、用于NBIO的job结构、超时时长、事件发生后的callback等信息。
ph_sock_t由NBIO pool管理。

解析域名并发起连接的过程：

 struct resolve_and_connect {   ph_sockaddr_t addr;   ph_socket_t s;   int resolve_status;   int connect_status;   uint16_t port;   struct timeval start, timeout, elapsed;   void *arg;   ph_sock_connect_func func;};  def ph_sock_resolve_and_connect(name, port, timeout, resolver, func,, args):    rac = ph_mem_alloc(mt.resolve_and_connect)    rac.func = func;    rac.arg = arg;    rac.start = ph_time_now();    rac.port = port;    if timeout:      rac.timeout = timeout     else:      rac.timeout = 60  # 默认60s超时    if ph_sockaddr_set_v4(rac.addr, name, port) == PH_OK：  # 如果name是IP地址      attempt_connect(rac)      return    # 根据resolver采用不同的解析域名的方式，    rac.addr = dns_getaddrinfo(resolver)    attempt_connect(rac)def attempt_connect(rac):    # 建立socket对象    rac.s = ph_socket_for_addr(rac.addr, SOCK_STREAM, PH_SOCK_CLOEXEC|PH_SOCK_NONBLOCK)     ph_socket_connect(rac.s, rac.addr, rac.timeout, connected_sock, rac)struct connect_job {   ph_job_t job;   ph_socket_t s;   ph_sockaddr_t addr;   int status;   struct timeval start;    void *arg;   ph_socket_connect_func func;};def ph_socket_connect(s, addr, timeout, func, arg):    # connect_job_template = { callback = connect_complete, memtype = mt.connect_job}    job = (struct connect_job*)ph_job_alloc(connect_job_template)    job.s, job.addr, job.func, job.arg = s, addr, func, arg    job.start = ph_time_now();    res = connect(s, job.addr ...)   # man 2 connect    if (...)  #       # 如果s对应fd是异步方式，使用事件回调机制, 回调函数是connect_complete      job.job.fd = s      job.job.callback = connect_complete      job.job.data = job      ph_job_set_nbio_timeout_in(&job->job, PH_IOMASK_WRITE,          timeout ? *timeout : default_timeout);      return;    # 同步IO， 直接调用connected_sock    done = job.stat  - now    func(s, addr, res == 0 ? 0 : errno, done, arg);def connect_complete(ph_job_t *j, ph_iomask_t why, void *data):    struct connect_job *job = data    if why == PH_IOMASK_TIME:      status = ETIMEDOUT    # 回调之前注册的函数， connected_sock    job.func(job.s, job.addr, status, done, job.arg)def connected_sock(s, addr, status, elapsed, arg):    struct resolve_and_connect *rac = arg;    sock = ph_sock_new_from_socket(s, NULL, addr)    calc_elapsed(rac)    # 回调用户定义的函数 ， 类型是ph_sock_connect_func    rac.func(sock, PH_SOCK_CONNECT_SUCCESS, 0, addr,  rac.elapsed, rac.arg);

ph_sock_t, 对1个socket连接的抽象:

struct ph_sock {  // Embedded job so we can participate in NBIO  ph_job_t job;  // Buffers for output, input  ph_bufq_t *wbuf, *rbuf;  // The per IO operation timeout duration  struct timeval timeout_duration;  // A stream for writing to the underlying connection  ph_stream_t *conn;  // A stream representation of myself.  Writing bytes into the  // stream causes the data to be buffered in wbuf  ph_stream_t *stream;  // Dispatcher  ph_sock_func callback;  bool enabled;  // sockname, peername as seen from this host.  // These correspond to the raw connection we see; if we are  // proxied, these are the names of our connection to the proxy.  // If we are not proxied, these are the same as the equivalents below  ph_sockaddr_t via_sockname, via_peername;  // sockname, peername as seen from the connected peer  // These are the actual outgoing address endpoints, independent of  // any proxying that may be employed  ph_sockaddr_t sockname, peername;  // If we've switched up to SSL, holds our SSL context  SSL *ssl;  ph_stream_t *ssl_stream;  ph_sock_openssl_handshake_func handshake_cb;  ph_bufq_t *sslwbuf;};// 创建ph_sock_t// connected_sock, accept_dispatch函数调用def ph_sock_new_from_socket(ph_socket_t s, ph_sockaddr_t *sockname, ph_sockaddr_t *peername):    # sock_job_template = {sock_dispatch, mt.sock}, 分配的结构体是ph_sock_t    sock = (ph_sock_t*)ph_job_alloc(&sock_job_template)    # 读写buf默认大小为128k    max_buf = ph_config_query_int("$.socket.max_buffer_size", MAX_SOCK_BUFFER_SIZE)    sock->wbuf = ph_bufq_new(max_buf)    sock->rbuf = ph_bufq_new(max_buf)    sock->conn = ph_stm_fd_open(s, 0, 0)    sock->stream = ph_stm_make(&sock_stm_funcs, sock, 0, 0)    # sockname记录本地地址， peer记录对端地址    sock->sockname = *sockname    sock->peername = *peername    # 默认60s超时    sock->timeout_duration.tv_sec = 60    return sock// 加入nbio的方式， 以ph_sock_connect_func回调取例  def connect_cb(ph_sock_t *sock, ...):      # 设置回调函数， 并开启      sock->callback = remote_cb      ph_sock_enable(sock, true);// 当ph_sock对应的fd有event发生时，nbio回调的入口函数是def sock_dispatch(j, why, data):    # SSL暂时不关心， 先skip这些代码    ph_sock_t *sock = (ph_sock_t*)j;    sock->conn->need_mask = 0;    // 把wbuf里缓存的数据写入fd    try_send(sock)    // 从系统中读取数据到rbuf    try_read(sock)    // 设置对应的mask, 回调用户注册的函数    // ....    sock->callback(sock, why, data);// 释放ph_socket_t，当发现需要关闭连接时  ph_sock_shutdown(sock, PH_SOCK_SHUT_RDWR);  //  如果sock->job.data之前有malloc数据，这里需要释放  ph_mem_free(mt_state, state);     ph_sock_free(sock);

sock的读写:

ph_sock_new_from_socket调用ph_stm_fd_open， ph_stm_make管理fd到sock.conn
sock_stm_funcs, 定义了socket的读写操作
读sock, 实际上读sock->rbuf，具体见sock_stm_readv
写sock, 实际上写sock->wbuf, 具体见sock_stm_writev
实际读写对应的fd, 是在sock_dispatch中进行的。所以如果需要立刻发送数据出去，调用ph_sock_wakeup

阅读全文

0 0