8# 2013-12-5

来源:互联网 发布:普通话在线测试软件 编辑:程序博客网 时间:2024/05/16 10:10

1.概要

日期: 2013-12-5

服务器:8#

现象:登录无响应


2.分析

对日志进行分析,发现存在处理队列堆积的情况 。

2.1日志

最后一行日志:
[2013-12-05 16:13:44:671][线程1872][2][20][0][]接收到消息(5:505,S=0:0,D=0:0,cmdserial=886523434),进入处理队列1,队列元素个数为7304...


分析768和3164线程的活动.


[线程768]
[2013-12-05 15:11:14:593][线程768][2][20][0][]正在处理:删除待办事宜(5:529,S=0:0,D=0:0,cmdserial=555275923)...

[线程768]此后就没有日志。
没有输出处理结束信息。应该在消息处理函数内部阻塞的。

[线程3164]
[2013-12-05 15:11:13:828][线程3164][2][20][0][]请求"基础数据同步"(15009,cmdserial=0)处理成功.结果消息个数:0.,记录总数:0.耗时:1.00(秒).
[线程3164]在此之后就没有信息了,没有再处理消息了。

2.2代码

(1)消息调度函数

int HTX_Scheduler::handle_message_block(ACE_Message_Block *mb) {HOTFOX::instance()->server_info_.run_stat_[4]--;CWrappedMsg<> *wm = 0;memcpy(&wm, mb->rd_ptr(), sizeof(CWrappedMsg<>*));mb->rd_ptr(sizeof(CWrappedMsg<>*));time(&wm->sche_time_); ///< 设置被调度到的时间if (wm->msg->GetMsgAttr()&HIGH_PAYLOAD_MASK) {HOTFOX::instance()->server_info_.run_stat_[5]--;}if (wm->msg->GetMsgAttr()&HEARTBEAT_MASK)  {///< 心跳信号不处理delete wm;return 0;}if (HTX_PACKET_TRACER::instance()->trace_flag()) {const char *trace_id = wm->msg->GetParam(TRACE_ID_PARA_NAME);if (!ISEMPTY_STR(trace_id)) {HTX_PACKET_TRACER::instance()->add_log(atol(trace_id),"开始处理",0);}}if (is_debug_) OutputMsg(wm->msg);vector<CWrappedMsg<>*> out;vector<ACE_Message_Block*> cache_mb; ///< 获取的缓存的消息块    bool enable_cache  = false;bool has_cache = false; ///< 是否已经有缓存int ret = 0;::EnumMsgType msgtype = wm->msg->GetMsgType();::MSGID msgid = wm->msg->GetMsgID();unsigned long msgdomain = use_domain_ ? wm->msg->GetDomain() : 0;EnumMsgPriority priority = (EnumMsgPriority)wm->msg->GetMsgPriority();int s_t,d_t; unsigned __int64 s_id,d_id;wm->msg->GetSource64(s_t,s_id);wm->msg->GetDest64(d_t,d_id);unsigned long msg_length = wm->msg->GetLength();///< 执行路由int i_FwdFlag = wm->msg->GetFwdFlag();void *loc = 0; ///< loc!=0,只有在router_!=0时才可能if (router_)  { ///< 转发包直接转发 int reaction = 0;int ret = router_->HandleInput(wm,&loc,reaction);if (ret) {            if (wm->msg->GetMsgAttr()&SYNC_CALL_MASK&&(msgtype==MT_CONFIRMATION||msgtype==MT_RESPONSE)) {                HTX_SYNCALL::instance()->wake(wm->GetMsg(),ret);            }DEBUG_LOG(HTX_LOGGER::instance(),(LO_STDOUT|LO_FILE,SEVERITY_DEBUG,"(task_id=%d)已处理消息%d:%d(priority:%d),从(%d:%I64d)到(%d:%I64d),包大小:%d字节.处理结果:%d\n",this->scheduler_id_,msgtype,msgid,priority,s_t,s_id,d_t,d_id,msg_length,ret));delete wm;return 0;}}HTX_TSS::instance()->ClearError();HTX_TSS::instance()->SetHL(wm->msg->GetMsgAttr()&HIGH_PAYLOAD_MASK);short auto_resp_mq = 0;HTX_NETWORK::instance()->HookMsg(wm->msg); ///< 调用登记的挂钩函数MSGIDTABLE *tb = plugin_->Find(wm->msg->GetMsgID(),wm->msg->GetMsgType(),use_domain_ ? wm->msg->GetDomain() : 0);vector<void*>::iterator it;if (tb==0) {if (wm->msg->GetMsgAttr()&TRACE_MASK) { ///< 如果处理的是需要确认的消息,则调用消息确认器,移出此消息HTX_MSG_TRACER::instance()->remove(wm->msg->GetCmdSerial());}HTX_LOGGER::instance()->log(LO_STDOUT|LO_FILE,SEVERITY_WARNING,"没有对应的插件处理消息(domain:%d,type:%d,id:%d)(from %d:%I64d to %d:%I64d)\n",wm->msg->GetDomain(),wm->msg->GetMsgType(),wm->msg->GetMsgID(),s_t,s_id,d_t,d_id);auto_resp_mq = 1;goto LAB_DONE;}if (!wm->connid.empty()) {if (!wm->conn_trusted_) {if (tb->info->bNeedAuth) {HTX_LOGGER::instance()->log(LO_STDOUT|LO_FILE,SEVERITY_INFO,"因未经验证,忽略请求:%s(%d:%d,cmdserial=%u).\n",tb->info->strName.c_str(),wm->msg->GetMsgType(),wm->msg->GetMsgID(),wm->msg->GetCmdSerial());auto_resp_mq = 1;goto LAB_DONE;}}}    enable_cache = HTX_DATACACHE::instance()->enable_cache(msgtype,msgid,msgdomain); ///< 是否需要缓存(wm->msg)的响应if (enable_cache&&tb->obj.size()>1) { ///< 多个插件处理同一消息时不支持缓存enable_cache = false;}if (enable_cache) {has_cache = HTX_DATACACHE::instance()->get(msgtype,msgid,msgdomain,out,cache_mb)==0;}it = tb->obj.begin();while(it!=tb->obj.end()) {CPlugInModule *pm = (CPlugInModule*)*it;//tb->obj;try {IPlugInInfo* pii = pm->GetAbout();HTX_LOGGER::instance()->log(LO_STDOUT|LO_FILE,SEVERITY_INFO,"正在处理:%s(%d:%d,S=%d:%d,D=%d:%d,cmdserial=%u)...\n",tb->info->strName.c_str(),wm->msg->GetMsgType(),wm->msg->GetMsgID(),s_t,s_id,d_t,d_id,wm->msg->GetCmdSerial());///< 调度插件执行请求DISPATCH_RESULT or;time_t start = clock();try {if (!has_cache) ret = pm->Dispatch(wm, out,or);if (ret) {if (i_FwdFlag==RL_STRICT) or.auto_resp_mq_ = -1;}}catch(sens::dbexception& e){or.auto_resp_mq_ = -1;ret = sens::EC_DB_OPERATION;HTX_LOGGER::instance()->log(LO_STDOUT|LO_FILE,SEVERITY_ERROR,"插件%s处理\"%s\"(%d:%d)产生%s异常\n",pm->GetAbout()->GetDllName(),tb->info->strName.c_str(),wm->msg->GetMsgType(),wm->msg->GetMsgID(),e.what());}catch(...) {or.auto_resp_mq_ = -1;ret = -1;HTX_LOGGER::instance()->log(LO_STDOUT|LO_FILE,SEVERITY_ERROR,"插件%s处理\"%s\"(%d:%d)产生异常\n",pm->GetAbout()->GetDllName(),tb->info->strName.c_str(),wm->msg->GetMsgType(),wm->msg->GetMsgID());if (HOTFOX::instance()->server_info_.IsDebug()) throw;}auto_resp_mq = or.auto_resp_mq_;/// @note 如果没有指定返回消息.以下2种情况,处理结束后不自动生成确认包/// 1.入口消息是UDP/// 2.明确不需要自动确认(只对MT_REQUEST生成MT_CONFIRMATION包)if (out.empty()&&(wm->connid.empty()||!or.auto_resp_)) {/// 如果没有返回包,HTX_LOGGER::instance()->log(LO_STDOUT|LO_FILE,SEVERITY_INFO,"\"%s\"(%d,cmdserial=%u)处理完成,返回值:%d,结果消息个数:%d\n",tb->info->strName.c_str(),wm->msg->GetMsgID(),wm->msg->GetCmdSerial(),ret,out.size());goto LAB_DONE;}string errmsg;if (ret) { /// ret==0表示成功,其它返回值表示出错const ERROR_MAP_ITEM *ei = or.err_code_ ? HTX_PLUGIN_MGR::instance()->GetError(or.err_code_) : 0;if (or.err_msg_.empty()&&ei==0) {errmsg = "未定义错误";}else {errmsg = !or.err_msg_.empty() ? or.err_msg_.c_str() : ei->ErrorContent;}}if (HTX_PACKET_TRACER::instance()->trace_flag()) {const char *trace_id = wm->msg->GetParam(TRACE_ID_PARA_NAME);if (!ISEMPTY_STR(trace_id)) {HTX_PACKET_TRACER::instance()->add_log(atol(trace_id),"处理结束",0);}}///< 如果框架配置为自动确认,则对没有返回**确认**消息的**请求**自动生成一个确认消息msgtype = wm->msg->GetMsgType();if (out.empty()) { ///< 自动返回消息处理///< 3种会话模式支持:///< 如果是MT_REQUEST,则自动构造MT_CONFRIMATION返回包///< 如果是MT_INDICATION而且要求确认,则构造MT_RESPONSE返回包if(or.auto_resp_&&(msgtype==MT_REQUEST||(msgtype==MT_INDICATION&&(wm->msg->GetMsgAttr()&RECEIPT_MASK)))) {CWrappedMsg<> *nwm = new CWrappedMsg<>;nwm->msg = new CMsg;nwm->msg->SetMsgType(msgtype==MT_REQUEST ? MT_CONFIRMATION :MT_RESPONSE);nwm->msg->SetMsgID(wm->msg->GetMsgID());nwm->msg->SetDomain(wm->msg->GetDomain());SetMsgStatusCode(nwm->msg,or.err_code_ ? or.err_code_ : ret); ///< 如果or提供了错误码则使用该错误码,否则利用返回值.(兼容以前的代码)if (ret) SetMsgStatus(nwm->msg,errmsg.c_str());out.push_back(nwm);}}unsigned long nRec = 0;if (SUCCEED(ret)) {for (int i=0;i<out.size();i++) {CWrappedMsg<> *pwm = out[i];assert(pwm->msg!=0);unsigned short nRecordset = pwm->msg->GetRowsets();for (unsigned short j=0;j<nRecordset;j++) nRec += pwm->msg->GetRowset(j)->GetRSMeta(RST_ROW_CNT);}}double duration = (double)(clock() - start) / CLOCKS_PER_SEC;if (ret) {HTX_LOGGER::instance()->log(LO_STDOUT|LO_FILE,SEVERITY_ERROR,"请求\"%s\"(%d,cmdserial=%u)处理完成,返回值:%d,耗时:%.2f(秒),错误码:%d,错误描述:%s.\n",tb->info->strName.c_str(),wm->msg->GetMsgID(),wm->msg->GetCmdSerial(),ret,duration,or.err_code_,errmsg.c_str());}else {HTX_LOGGER::instance()->log(LO_STDOUT|LO_FILE,SEVERITY_INFO,"请求\"%s\"(%d,cmdserial=%u)处理成功.结果消息个数:%d.,记录总数:%u.耗时:%.2f(秒).\n",tb->info->strName.c_str(),wm->msg->GetMsgID(),wm->msg->GetCmdSerial(),out.size(),nRec,duration);}DEBUG_LOG(HTX_LOGGER::instance(),(LO_STDOUT|LO_FILE,SEVERITY_DEBUG,"*****总请求数:%u,待处理请求数:%u.\n",HOTFOX::instance()->server_info_.run_stat_[0].value(),HOTFOX::instance()->server_info_.run_stat_[4].value()));if (enable_cache&&!has_cache&&out.size()>0) {HTX_DATACACHE::instance()->put(msgtype,msgid,msgdomain,out);}///< 发送处理结果信息for(int i=0; i<out.size(); i++) {CWrappedMsg<> *pwm = out[i];if (pwm->mode_==EM_UNSPECIFIED)pwm->mode_ = wm->mode_;///< 按接收通道发送出去if (pwm->mode_==EM_UDP) {if (pwm->h_addrs.empty()) {HostAddress *addr = new HostAddress;addr->ip_ = wm->h_addrs[0]->ip_;addr->port_ = wm->h_addrs[0]->port_;pwm->h_addrs.push_back(addr);}}else {if (pwm->connid.empty()) pwm->connid.push_back(wm->connid[0]);pwm->service_id_ = wm->service_id_;pwm->acceptor_index_ = wm->acceptor_index_;}EnumMsgType msgtype2 = pwm->msg->GetMsgType(); ///< 返回的消息类型if (HTX_PACKET_TRACER::instance()->trace_flag()) {const char *trace_id = wm->msg->GetParam(TRACE_ID_PARA_NAME);if (!ISEMPTY_STR(trace_id)) {pwm->msg->AddParam(TRACE_ID_PARA_NAME,trace_id);string s = LogMsg("准备发送消息:%d,%d",pwm->msg->GetMsgType(),pwm->msg->GetMsgID());HTX_PACKET_TRACER::instance()->add_log(atol(trace_id),s.c_str(),0);}}if (msgtype2==MT_CONFIRMATION||msgtype2==MT_RESPONSE) { ///< 如果是确认消息后者是应答消息,按协议规则,请求必须是MT_REQUEST,MT_INDICATIONpwm->msg->SetMsgID(wm->msg->GetMsgID());pwm->msg->SetCmdSerial(wm->msg->GetCmdSerial());pwm->msg->SetDomain(wm->msg->GetDomain());int t; unsigned __int64 t_id;wm->msg->GetSource64(t,t_id);if (t!=0) pwm->msg->SetDest64(t,t_id); ///< @todo 能否不加if(t!=0)判定wm->msg->GetDest64(t,t_id);if (t!=0) pwm->msg->SetSource64(t,t_id);if (wm->msg->GetMsgAttr()&SYNC_CALL_MASK)pwm->msg->SetMsgAttr(SYNC_CALL_MASK);if (wm->msg->GetMsgAttr()&EXTEND1_MASK)pwm->msg->SetMsgAttr(EXTEND1_MASK);}if (pwm->msg->GetMsgID()==wm->msg->GetMsgID()) { ///< 入口消息的优先级别决定出口消息的优先级别///< 可能一个入口消息,产生多个出口消息pwm->msg->SetMsgPriority(wm->msg->GetMsgPriority());}///< 将信息写入发送队列if (is_debug_) OutputMsg(out[i]->msg);if (wm->flag&CWrappedMsg<>::ML_IGNORE_RESP) { ///< 如果请求的消息设置了忽略返回,则销毁生成的消息delete out[i];}else {HTX_NETWORK::instance()->SendMsg(out[i]);}}///< 如果未设置错误,且当前处理出现错误,则设置错误信息(允许插件设置处理过程的错误信息,如果未设置则框架自动设置)if (HTX_TSS::instance()->GetLastError()==0) {if (or.err_code_||ret) { ///< 设置当前处理错误码HTX_TSS::instance()->SetErrorCode(or.err_code_ ? or.err_code_ : ret);if (HTX_TSS::instance()->GetLastErrorLevel()<SEVERITY_ERROR)HTX_TSS::instance()->SetInfoLevel(SEVERITY_ERROR);}}if (HTX_MANAGER::instance()->GetAudit()) {(*HTX_MANAGER::instance()->GetAudit())(wm,out,or);}DEBUG_LOG(HTX_LOGGER::instance(),(LO_STDOUT|LO_FILE,SEVERITY_DEBUG,"消息(%d:%d)返回%d个消息.\n",wm->msg->GetMsgType(),wm->msg->GetMsgID(),out.size()));out.clear();if (SUCCEED(ret))HOTFOX::instance()->server_info_.run_stat_[1]++;elseHOTFOX::instance()->server_info_.run_stat_[2]++;}catch(...) {HTX_LOGGER::instance()->log(LO_STDOUT|LO_FILE,SEVERITY_ERROR,"插件%s处理%s(%d:%d)产生异常\n",pm->GetAbout()->GetDllName(),tb->info->strName.c_str(),wm->msg->GetMsgType(),wm->msg->GetMsgID());if (HOTFOX::instance()->server_info_.IsDebug()) {char fn[32];SYSTEMTIME sysTime;GetLocalTime(&sysTime);sprintf(fn,"log\\%d%02d%02d%02d%02d%02d_%d_%d.umx",/// @note 文件命令不严格,可能重复导致覆盖sysTime.wYear,sysTime.wMonth,sysTime.wDay,sysTime.wHour,sysTime.wMinute,sysTime.wSecond,wm->msg->GetMsgID(),wm->msg->GetMsgType());umxns::CUMX2 umx;umx.Attach(wm->msg);umx.Serialize();umx.Detach();DumpFile(fn,umx.GetBuffer(),umx.GetLength());throw;}}it++;}LAB_DONE:if (wm->msg->GetMsgAttr()&SYNC_CALL_MASK&&(msgtype==MT_CONFIRMATION||msgtype==MT_RESPONSE)) {HTX_SYNCALL::instance()->wake(wm->GetMsg(),ret);}if (router_&&loc) { int ret = router_->PostHandle(wm->msg,loc,auto_resp_mq);/// 是否回复由插件消息处理函数控制,默认时对成功的请求自动回复if( ret ){DEBUG_LOG(HTX_LOGGER::instance(),(LO_STDOUT|LO_FILE,SEVERITY_DEBUG,"消息(%d:%d)处理失败 PostHandle=%d!\n",wm->msg->GetMsgType(),wm->msg->GetMsgID(),ret));}}DEBUG_LOG(HTX_LOGGER::instance(),(LO_STDOUT|LO_FILE,SEVERITY_DEBUG,"消息(%d:%d)处理结束!\n",wm->msg->GetMsgType(),wm->msg->GetMsgID()));delete wm;return 0;}

(2)删除待办事宜记录(529-Request)处理函数

int CIMPlugin::OnDelPendingRequestRec(CWrappedMsg<> *in,vector<CWrappedMsg<> *> &out,DISPATCH_RESULT &or){CMsg *msg = in->msg;const char *recordno = msg->GetParam("RECNO");const char *msgtype = msg->GetParam("MSG_TYPE");const char *assno = msg->GetParam("ASS_NO");const char *user_serial = msg->GetParam("User_Serial");if(ISEMPTY_STR(recordno) && (ISEMPTY_STR(msgtype)||ISEMPTY_STR(assno))){return -1;}CWrappedMsg<> *pwm = new CWrappedMsg<>;CMsg * ans = new CMsg;pwm->msg = ans;ans->SetMsgType(MT_CONFIRMATION);SetMsgStatusCode(ans,CE_SUCCESS);out.push_back(pwm);GETDBC(pdbor, local_dbc_.c_str());if(pdbor==NULL){SetMsgStatusCode(ans,CE_CONNECTDB_ERR);return -1;}string sql;if(!ISEMPTY_STR(recordno))sql = LogMsg("delete from tb_0032 where object_id=%s",recordno);else{sql = LogMsg("delete from tb_0032 where f002v_0032='%s' and f001n_0032=%s",assno, msgtype);if(!ISEMPTY_STR(user_serial)){sql += LogMsg(" and userserial=%s", user_serial);}}bool br = pdbor->Execute(adCmdText,sql.c_str());if (!br) {SetMsgStatusCode(ans, CE_DATABASE);return -1;}return 0;}


(3)消息处理后挂钩函数

int CSEMQ::PostHandle(CMsg *msg,void *arg,int action) {    assert(arg!=0);    SEMQ_ACK_HELPER *loc = (SEMQ_ACK_HELPER*)arg;    if (action) {        ///< 写待确认记录失败可忽略,最坏的情况是导致重新发送        ///< 自动确认发生在本地服务器对需要确认的消息上,保存待确认的消息是为了之后平台可以询问        if (action!=-1) {            if (SaveAck(loc)==0) ///< @note 写入后loc中保存semq的object_id信息,Ack时需要使用                Ack(loc,0);        }        else {            if (redoar_) {                if (redoar_->Save(msg,GetThreadLastErrorText(),loc->seq_ctrl_key_.c_str(),loc->app_key_.c_str())==0) {                    if (SaveAck(loc)==0)                        Ack(loc,0);                    delete loc;                    return 0;                }            }            ///            Ack(loc,1); ///< 失败不做确认        }    }    delete loc;    return 0;}

对于线程768,日志中没有HTX_Scheduler::handle_message_block第184行代码的输出,说明阻塞在CIMPlugin::OnDelPendingRequestRec函数中。

对于OnDelPendingRequestRec,几乎只可能发生在第20行代码上。-----获取数据库连接


对于线程3164,日志是在HTX_Scheduler::handle_message_block第184行代码的输出后阻塞的.

第184行之后的代码中,最可能产生阻塞的是:第253行,第303行.

303上PostHandle需要访问数据库.


同时能够解释2个线程活动的是阻塞在获取数据库连接时.

但从代码上又没有看出缺陷.

那些持有连接的线程在做什么?

由于日志级别为SEVERITY_INFO,没有更详细的信息.


此问题待跟踪.


原创粉丝点击