memcached学习笔记(二)
来源:互联网 发布:猫池软件 编辑:程序博客网 时间:2024/05/21 11:28
经过这几天的学习,要将RDMA移植到memcached上,主要有下面3个方面的工作需要完成:
1、TCP连接到QP(Queue Pair)的创建
Memcached在建立网络连接时,只是使用socket创建了TCP的数据链路;而使用RDMA时还需要在客户端和服务器端创建SQ和RQ队列并创建指针指向注册内存区域。
2、Get和Set操作
数据库最主要的操作就是查询和插入,对应于Get和Set操作。客户端读写数据时首先会访问memcached服务器,对服务器Hash表中的kv对(key-value)进行操作,这里要将客户端和缓存服务器间的基于TCP的数据传输换成RDMA方式。上一次的总结结尾说过读写操作和我们这次比赛的关系不大,后来发现还是有关系的。
3、Update操作
在访问缓存服务器失效时(第一次访问该数据或者被LRU替换掉了),数据返回给用户的同时还要写入缓存服务器,以便下次访问;缓存服务器中改动过的数据也要写回数据库中,保持数据的一致性。缓存服务器和数据库之间的数据传输也可以改成RDMA方式。
这几天具体看了下memcached数据处理的过程和get、set操作过程,跟大家分享一下。
Memcached在网络连接建立后,由Master线程分发给Worker线程,而Worker线程处理业务逻辑,采用状态机的方式,状态机按不同的状态处理业务逻辑。Memcached.h中定义的11种状态如下:
enum conn_states { conn_listening, /**< the socket which listens for connections */ //监听状态 conn_new_cmd, /**< Prepare connection for next command */ //为新连接做一些准备 conn_waiting, /**< waiting for a readable socket */ //等待读取一个数据包 conn_read, /**< reading in a command line */ //读取网络数据 conn_parse_cmd, /**< try to parse a command from the input buffer */ //解析缓冲区的数据 conn_write, /**< writing out a simple response */ //简单的回复数据 conn_nread, /**< reading in a fixed number of bytes */ //读取固定数据的网络数据 conn_swallow, /**< swallowing unnecessary bytes w/o storing */ //处理不需要的写缓冲区的数据 conn_closing, /**< closing this connection */ //关闭连接 conn_mwrite, /**< writing out many items sequentially */ //顺序的写多个item数据 conn_closed, /**< connection is closed */ //连接已关闭 conn_max_state /**< Max state value (used for assertion) */ //最大状态,做断言使用};
状态转换关系如下图,有些状态没什么用,就没展现。
while(!stop){switch(c->state){case conn_listening:break;case conn_waiting:break; …}}
在这11种状态中,conn_listening在上次总结中说过了,conn_closed和conn_max_state没什么太大用处就不说了。
case conn_new_cmd: /* Only process nreqs at a time to avoid starving other connections */ --nreqs; //全局变量,记录每个libevent实例处理的事件,通过初始启动参数配置 if (nreqs >= 0) {//还可以处理请求 reset_cmd_handler(c); } else {//拒绝请求 pthread_mutex_lock(&c->thread->stats.mutex); c->thread->stats.conn_yields++; pthread_mutex_unlock(&c->thread->stats.mutex); if (c->rbytes > 0) { /* We have already read in data into the input buffer, so libevent will most likely not signal read events on the socket (unless more data is available. As a hack we should just put in a request to write data, because that should be possible ;-) */ if (!update_event(c, EV_WRITE | EV_PERSIST)) {//更新libevent状态 if (settings.verbose > 0) fprintf(stderr, "Couldn't update event\n"); conn_set_state(c, conn_closing); //关闭连接 break; } } stop = true; } break; case conn_waiting: if (!update_event(c, EV_READ | EV_PERSIST)) {//更新libevent状态,也就是删除libevent事件后,重新注册libevent事件 if (settings.verbose > 0) fprintf(stderr, "Couldn't update event\n"); conn_set_state(c, conn_closing); break; } conn_set_state(c, conn_read); //进入读数据状态 stop = true; break; case conn_read: res = IS_UDP(c->transport) ? try_read_udp(c) : try_read_network(c); //判断采用UDP协议还是TCP协议 switch (res) { case READ_NO_DATA_RECEIVED: conn_set_state(c, conn_waiting); //未读取到数据,继续等待 break; case READ_DATA_RECEIVED: conn_set_state(c, conn_parse_cmd); //读取到数据,开始解析数据 break; case READ_ERROR: conn_set_state(c, conn_closing); //读取发生错误,关闭连接 break; case READ_MEMORY_ERROR: /* Failed to allocate more memory */ /* State already set by try_read_network *///申请内存空间错误,不做处理,继续尝试 break; } break; case conn_parse_cmd : if (try_read_command(c) == 0) {//如果读取到的数据不够,继续等待,等读取到的数据够了,再进行解析 /* wee need more data! */ conn_set_state(c, conn_waiting); } break; case conn_nread: if (c->rlbytes == 0) { complete_nread(c); break; } /* Check if rbytes < 0, to prevent crash */ if (c->rlbytes < 0) { if (settings.verbose) { fprintf(stderr, "Invalid rlbytes to read: len %d\n", c->rlbytes); } conn_set_state(c, conn_closing); break; } /* first check if we have leftovers in the conn_read buffer */ if (c->rbytes > 0) { int tocopy = c->rbytes > c->rlbytes ? c->rlbytes : c->rbytes; if (c->ritem != c->rcurr) { memmove(c->ritem, c->rcurr, tocopy); } c->ritem += tocopy; c->rlbytes -= tocopy; c->rcurr += tocopy; c->rbytes -= tocopy; if (c->rlbytes == 0) { break; } } /* now try reading from the socket */ res = read(c->sfd, c->ritem, c->rlbytes); if (res > 0) { pthread_mutex_lock(&c->thread->stats.mutex); c->thread->stats.bytes_read += res; pthread_mutex_unlock(&c->thread->stats.mutex); if (c->rcurr == c->ritem) { c->rcurr += res; } c->ritem += res; c->rlbytes -= res; break; } if (res == 0) { /* end of stream */ conn_set_state(c, conn_closing); break; } if (res == -1 && (errno == EAGAIN || errno == EWOULDBLOCK)) { if (!update_event(c, EV_READ | EV_PERSIST)) { if (settings.verbose > 0) fprintf(stderr, "Couldn't update event\n"); conn_set_state(c, conn_closing); break; } stop = true; break; } /* otherwise we have a real error, on which we close the connection */ if (settings.verbose > 0) { fprintf(stderr, "Failed to read, and not due to blocking:\n" "errno: %d %s \n" "rcurr=%lx ritem=%lx rbuf=%lx rlbytes=%d rsize=%d\n", errno, strerror(errno), (long)c->rcurr, (long)c->ritem, (long)c->rbuf, (int)c->rlbytes, (int)c->rsize); } conn_set_state(c, conn_closing); break; case conn_swallow: /* we are reading sbytes and throwing them away */ if (c->sbytes == 0) { conn_set_state(c, conn_new_cmd); break; } /* first check if we have leftovers in the conn_read buffer */ if (c->rbytes > 0) { int tocopy = c->rbytes > c->sbytes ? c->sbytes : c->rbytes; c->sbytes -= tocopy; c->rcurr += tocopy; c->rbytes -= tocopy; break; } /* now try reading from the socket */ res = read(c->sfd, c->rbuf, c->rsize > c->sbytes ? c->sbytes : c->rsize); if (res > 0) { pthread_mutex_lock(&c->thread->stats.mutex); c->thread->stats.bytes_read += res; pthread_mutex_unlock(&c->thread->stats.mutex); c->sbytes -= res; break; } if (res == 0) { /* end of stream */ conn_set_state(c, conn_closing); break; } if (res == -1 && (errno == EAGAIN || errno == EWOULDBLOCK)) { if (!update_event(c, EV_READ | EV_PERSIST)) { if (settings.verbose > 0) fprintf(stderr, "Couldn't update event\n"); conn_set_state(c, conn_closing); break; } stop = true; break; } /* otherwise we have a real error, on which we close the connection */ if (settings.verbose > 0) fprintf(stderr, "Failed to read, and not due to blocking\n"); conn_set_state(c, conn_closing); break; case conn_write: /* * We want to write out a simple response. If we haven't already, * assemble it into a msgbuf list (this will be a single-entry * list for TCP or a two-entry list for UDP). */ if (c->iovused == 0 || (IS_UDP(c->transport) && c->iovused == 1)) { if (add_iov(c, c->wcurr, c->wbytes) != 0) { if (settings.verbose > 0) fprintf(stderr, "Couldn't build response\n"); conn_set_state(c, conn_closing); break; } } /* fall through... */ case conn_mwrite: if (IS_UDP(c->transport) && c->msgcurr == 0 && build_udp_headers(c) != 0) { if (settings.verbose > 0) fprintf(stderr, "Failed to build UDP headers\n"); conn_set_state(c, conn_closing); break; } switch (transmit(c)) { case TRANSMIT_COMPLETE: if (c->state == conn_mwrite) { conn_release_items(c); /* XXX: I don't know why this wasn't the general case */ if(c->protocol == binary_prot) { conn_set_state(c, c->write_and_go); } else { conn_set_state(c, conn_new_cmd); } } else if (c->state == conn_write) { if (c->write_and_free) { free(c->write_and_free); c->write_and_free = 0; } conn_set_state(c, c->write_and_go); } else { if (settings.verbose > 0) fprintf(stderr, "Unexpected state %d\n", c->state); conn_set_state(c, conn_closing); } break; case TRANSMIT_INCOMPLETE: case TRANSMIT_HARD_ERROR: break; /* Continue in state machine. */ case TRANSMIT_SOFT_ERROR: stop = true; break; } break; case conn_closing: if (IS_UDP(c->transport)) conn_cleanup(c); else conn_close(c); stop = true; break;
0 0
- memcached学习笔记(二)
- memcached学习笔记(二)
- Memcached 学习笔记(二)内存机制
- memcached学习(二)
- Memcached (参数说明)--学习笔记(二)
- Memcached 学习笔记(二)——ruby调用
- Memcached 学习笔记(一)
- memcached学习笔记(一)
- memcached学习笔记(三)
- memcached学习笔记(一)
- memcached学习笔记(三)
- Memcached学习笔记(1)
- Memcached-学习(二)XMemcached&Spring集成
- Memcached 入门学习二(命令操作)
- memcached学习二
- memcached学习笔记(1)——memcached原理
- memcached学习笔记(2)——memcached实践
- memcached学习笔记(1)——memcached原理
- 将所选radio buttons的值相加
- 视C++为一个语言联邦
- Linux笔记(46)——bash变量
- Linux下 undefined reference问题
- 虚析构函数(√)、纯虚析构函数(√)、虚构造函数(X)
- memcached学习笔记(二)
- C++创建单链表
- C/C++中的static关键字的总结
- JVM性能调优
- Android 时间轴实现(RecycleView更高效)
- 几何原本查询程序2.0
- linux IO子系统和文件系统读写流程
- 嵌入式项目远程控制-服务器端
- 单元测试主要的测试功能点