浅析lua异常捕获处理机制

来源:互联网 发布:网络修真小说排行榜 编辑:程序博客网 时间:2024/05/24 01:36
异常捕获是高级语言的一大特性,通过对异常的捕获和处理,可以有效提高系统的稳定性和健壮性。因为无论再怎样改进代码,都不可避免出现一些异常,例如文件io错误、网络错误、内存错误等等,就要求编码对错误进行捕获,同时打印日志以便开发人员跟进问题的处理。当然,lua也提供了接口用于捕获运行时异常。

lua异常捕获函数

lua有两个函数可用于捕获异常: pcall 和 xpcall,这两个函数很类似,都会在保护模式下执行函数,效果类似try-catch,可捕获并处理异常。
两个函数的原型如下:
pcall (func [, arg1, ···])xpcall (func, errfunc [, arg1, ···])
对比两个函数,xpcall多了一个异常处理函数参数 errfunc。对于pcall,异常处理完时只简单记录错误信息,然后释放调用栈空间,而对于xpcall,这个参数可用于在调用栈释放前跟踪到这些数据。效果如下:
> f=function(...) error(...) end> pcall(f, 123)false   stdin:1: 123> xpcall(f, function(e) print(debug.traceback()) return e end, 123)stack traceback:        stdin:1: in function <stdin:1>        [C]: in function 'error'        stdin:1: in function 'f'        [C]: in function 'xpcall'        stdin:1: in main chunk        [C]: in ?false   stdin:1: 123
值得注意的是,errfunc的传入参数是异常数据,函数结束时必须将这个数据返回,才能实现和 pcall 一样的返回值

lua异常捕获处理机制

下面,以 pcall 来说明lua异常捕获是怎么实现的。这里,lua代码版本取5.3.1

pcall的实现

pcall的c实现函数为luaB_pcall,如下:
// lbaselib.cstatic int luaB_pcall (lua_State *L) {  int status;  luaL_checkany(L, 1);  lua_pushboolean(L, 1);  // 如果没错误发生,返回true  lua_insert(L, 1);  /* put it in place */  status = lua_pcallk(L, lua_gettop(L) - 2, LUA_MULTRET, 0, 0, finishpcall);  return finishpcall(L, status, 0);}
看下lua_pcallk,这是pcall的预处理函数:
// lapi.cLUA_API int lua_pcallk (lua_State *L, int nargs, int nresults, int errfunc,                        lua_KContext ctx, lua_KFunction k) {  struct CallS c;  int status;  ptrdiff_t func;  lua_lock(L);  api_check(L, k == NULL || !isLua(L->ci),    "cannot use continuations inside hooks");  api_checknelems(L, nargs+1);  api_check(L, L->status == LUA_OK, "cannot do calls on non-normal thread");  checkresults(L, nargs, nresults);  if (errfunc == 0)    func = 0;  else {    StkId o = index2addr(L, errfunc);    api_checkstackindex(L, errfunc, o);    func = savestack(L, o);   }  c.func = L->top - (nargs+1);  // 取到pcall要执行的函数  if (k == NULL || L->nny > 0) {  /* no continuation or no yieldable? */    c.nresults = nresults;  /* do a 'conventional' protected call */    /* 处理pcall(非协程走这里,详见拓展阅读)*/    status = luaD_pcall(L, f_call, &c, savestack(L, c.func), func);   }  else {  //当resume协程时执行,已在保护模式下    CallInfo *ci = L->ci;    ci->u.c.k = k;  /* save continuation */    ci->u.c.ctx = ctx;  /* save context */    /* save information for error recovery */    ci->extra = savestack(L, c.func);    ci->u.c.old_errfunc = L->errfunc;    L->errfunc = func; // 记录异常处理函数    setoah(ci->callstatus, L->allowhook);  /* save value of 'allowhook' */    ci->callstatus |= CIST_YPCALL;  // 打标记设置协程恢复点    luaD_call(L, c.func, nresults, 1);  /* do the call */    ci->callstatus &= ~CIST_YPCALL; // 执行结束去掉标记    L->errfunc = ci->u.c.old_errfunc;    status = LUA_OK;  /* if it is here, there were no errors */  }  adjustresults(L, nresults);  lua_unlock(L);  return status;}
这里重点看下luaD_pcall 的实现,这是pcall的核心处理函数:
// ldo.c int luaD_pcall (lua_State *L, Pfunc func, void *u,                ptrdiff_t old_top, ptrdiff_t ef) {  int status;  CallInfo *old_ci = L->ci;  lu_byte old_allowhooks = L->allowhook;  unsigned short old_nny = L->nny;  ptrdiff_t old_errfunc = L->errfunc;  L->errfunc = ef; // 记录异常处理函数  status = luaD_rawrunprotected(L, func, u); // 以保护模式运行  if (status != LUA_OK) {  // 当异常发生时    StkId oldtop = restorestack(L, old_top); // ‘释放’调用栈    luaF_close(L, oldtop);  /* close possible pending closures */    seterrorobj(L, status, oldtop);    L->ci = old_ci;    L->allowhook = old_allowhooks;    L->nny = old_nny;    luaD_shrinkstack(L);  }  L->errfunc = old_errfunc;  return status;}
看下 luaD_rawrunprotected 函数的实现:
// ldo.c int luaD_rawrunprotected (lua_State *L, Pfunc f, void *ud) {  unsigned short oldnCcalls = L->nCcalls;  struct lua_longjmp lj;  lj.status = LUA_OK;  lj.previous = L->errorJmp;  /* chain new error handler */  L->errorJmp = &lj;  LUAI_TRY(L, &lj,    (*f)(L, ud);  ); // 异常宏处理  L->errorJmp = lj.previous;  // 设置错误跳转点  L->nCcalls = oldnCcalls;  return lj.status;}
LUAI_TRY 以及文章后面出现的LUAI_THROW 都是宏实现的,目的是兼容主流c或c++版本的异常处理语法,表现为try-catch的语法结构。简单说就是,执行代码前先try,执行过程出错就throw,然后在catch的地方处理异常。
/*** LUAI_THROW/LUAI_TRY define how Lua does exception handling. By** default, Lua handles errors with exceptions when compiling as** C++ code, with _longjmp/_setjmp when asked to use them, and with** longjmp/setjmp otherwise.*/#if !defined(LUAI_THROW) /* { */#if defined(__cplusplus) && !defined(LUA_USE_LONGJMP)/* { *//* C++ exceptions */#define LUAI_THROW(L,c) throw(c)#define LUAI_TRY(L,c,a) \ try { a } catch(...) { if ((c)->status == 0) (c)->status = -1; }#define luai_jmpbuf int  /* dummy variable */#elif defined(LUA_USE_POSIX) /* }{ *//* in POSIX, try _longjmp/_setjmp (more efficient) */#define LUAI_THROW(L,c) _longjmp((c)->b, 1)#define LUAI_TRY(L,c,a) if (_setjmp((c)->b) == 0) { a }#define luai_jmpbuf jmp_buf#else /* }{ *//* ISO C handling with long jumps */#define LUAI_THROW(L,c) longjmp((c)->b, 1)#define LUAI_TRY(L,c,a) if (setjmp((c)->b) == 0) { a }#define luai_jmpbuf jmp_buf#endif /* } */#endif /* } */
当异常出现时,status 就会赋值为-1,即不等于 LUA_OK


xpcall和pcall的区别

对比下xpcall对比pcall,有什么区别?
// lbaselib.cstatic int luaB_pcall (lua_State *L) {  int status;  luaL_checkany(L, 1);  lua_pushboolean(L, 1);  /* first result if no errors */  lua_insert(L, 1);  /* put it in place */  status = lua_pcallk(L, lua_gettop(L) - 2, LUA_MULTRET, 0, 0, finishpcall);  return finishpcall(L, status, 0);}static int luaB_xpcall (lua_State *L) {  int status;  int n = lua_gettop(L);  luaL_checktype(L, 2, LUA_TFUNCTION);  /* check error function */  lua_pushboolean(L, 1);  /* first result */  lua_pushvalue(L, 1);  // 将异常处理函数 errfunc 写入调用栈  lua_rotate(L, 3, 2);  /* move them below function's arguments */  status = lua_pcallk(L, n - 2, LUA_MULTRET, 2, 2, finishpcall);  return finishpcall(L, status, 2);}
再回头看下lua_pcallk 
// lapi.cLUA_API int lua_pcallk (lua_State *L, int nargs, int nresults, int errfunc,                        lua_KContext ctx, lua_KFunction k) {  struct CallS c;  int status;  ptrdiff_t func;  lua_lock(L);  api_check(L, k == NULL || !isLua(L->ci),    "cannot use continuations inside hooks");  api_checknelems(L, nargs+1);  api_check(L, L->status == LUA_OK, "cannot do calls on non-normal thread");  checkresults(L, nargs, nresults);  if (errfunc == 0)    func = 0; // pcall时 func为0  else {    StkId o = index2addr(L, errfunc);    api_checkstackindex(L, errfunc, o);    func = savestack(L, o); // xpcall取到异常处理函数 errfunc  }
然后, func 赋值给 L->errfunc,在异常发生时,就检查一下这个函数。

什么地方执行这个错误处理函数?
// ldebug.cl_noret luaG_errormsg (lua_State *L) {  if (L->errfunc != 0) {  // 如果有异常处理函数 errfunc     StkId errfunc = restorestack(L, L->errfunc);     setobjs2s(L, L->top, L->top - 1);  /* move argument */    setobjs2s(L, L->top - 1, errfunc);  /* push function */    L->top++;  /* assume EXTRA_STACK */    luaD_call(L, L->top - 2, 1, 0);  /* call it */  }  luaD_throw(L, LUA_ERRRUN);}
luaD_throw则是调用 LUAI_THROW,跳到前面catch的位置
// ldo.cl_noret luaD_throw (lua_State *L, int errcode) {  if (L->errorJmp) {  /* thread has an error handler? */    L->errorJmp->status = errcode;  /* set status */    LUAI_THROW(L, L->errorJmp);  /* jump to it */  }  else {  /* thread has no error handler */    global_State *g = G(L);    L->status = cast_byte(errcode);  /* mark it as dead */    if (g->mainthread->errorJmp) {  /* main thread has a handler? */      setobjs2s(L, g->mainthread->top++, L->top - 1);  /* copy error obj. */      luaD_throw(g->mainthread, errcode);  /* re-throw in main thread */    }    else {  /* no handler at all; abort */      if (g->panic) {  /* panic function? */        seterrorobj(L, errcode, L->top);  /* assume EXTRA_STACK */        if (L->ci->top < L->top)          L->ci->top = L->top;  /* pushing msg. can break this invariant */        lua_unlock(L);        g->panic(L);  /* call panic function (last chance to jump out) */      }      abort();    }  }}

两种情况会触发这个函数
1、主动调用error时:
// lapi.cLUA_API int lua_error (lua_State *L) {  lua_lock(L);  api_checknelems(L, 1);  luaG_errormsg(L);  /* code unreachable; will unlock when control actually leaves the kernel */  return 0;  /* to avoid warnings */}
2、出现运行时错误时:
// ldebug.cl_noret luaG_runerror (lua_State *L, const char *fmt, ...) {  CallInfo *ci = L->ci;  const char *msg;  va_list argp;  va_start(argp, fmt);  msg = luaO_pushvfstring(L, fmt, argp);  /* format message */  va_end(argp);  if (isLua(ci))  /* if Lua function, add source:line information */    luaG_addinfo(L, msg, ci_func(ci)->p->source, currentline(ci));  luaG_errormsg(L);}


拓展阅读

lua state之nny 

在lua state中,nny记录了调用栈上不能被挂起的次数,定义在 lua_State结构:
struct lua_State {  CommonHeader;  lu_byte status;  StkId top;  /* first free slot in the stack */  global_State *l_G;  CallInfo *ci;  /* call info for current function */  const Instruction *oldpc;  /* last pc traced */  StkId stack_last;  /* last free slot in the stack */  StkId stack;  /* stack base */  UpVal *openupval;  /* list of open upvalues in this stack */  GCObject *gclist;  struct lua_State *twups;  /* list of threads with open upvalues */  struct lua_longjmp *errorJmp;  /* current error recover point */  CallInfo base_ci;  /* CallInfo for first level (C calling Lua) */  lua_Hook hook;  ptrdiff_t errfunc;  /* current error handling function (stack index) */  int stacksize;  int basehookcount;  int hookcount;  unsigned short nny;  /* number of non-yieldable calls in stack */  unsigned short nCcalls;  /* number of nested C calls */  lu_byte hookmask;  lu_byte allowhook;};
为什么lua state需要这个字段?
这是因为pcall调用函数中还可以有pcall,支持多层嵌套,所以需要记录一个次数值,在pcall执行前+1,结束后-1
/*** Call a function (C or Lua). The function to be called is at *func.** The arguments are on the stack, right after the function.** When returns, all the results are on the stack, starting at the original** function position.*/void luaD_call (lua_State *L, StkId func, int nResults, int allowyield) {  fprintf(stderr, "luaD_call %d\n", allowyield);  if (++L->nCcalls >= LUAI_MAXCCALLS) {    if (L->nCcalls == LUAI_MAXCCALLS)      luaG_runerror(L, "C stack overflow");    else if (L->nCcalls >= (LUAI_MAXCCALLS + (LUAI_MAXCCALLS>>3)))      luaD_throw(L, LUA_ERRERR);  /* error while handing stack error */  }  if (!allowyield) L->nny++;  if (!luaD_precall(L, func, nResults))  /* is a Lua function? */    luaV_execute(L);  /* call it */  if (!allowyield) L->nny--;  L->nCcalls--;}
这样,通过判断 nny的值,就可以知道当前过程能否挂起。
细心的同学会发现,luaD_call是在pcall的执行过程调用到的,为什么 L->nny会大于0?
这是因为lua 虚拟机(lua state)启动时 L->nny 就赋值1了
// lstate.cLUA_API lua_State *lua_newstate (lua_Alloc f, void *ud) {  int i;  lua_State *L;  global_State *g;  LG *l = cast(LG *, (*f)(ud, NULL, LUA_TTHREAD, sizeof(LG)));  if (l == NULL) return NULL;  L = &l->l.l;  g = &l->g;  L->next = NULL;  L->tt = LUA_TTHREAD;  g->currentwhite = bitmask(WHITE0BIT);  L->marked = luaC_white(g);  preinit_thread(L, g);  // 预处理协程时将 L->nny 赋值1  g->frealloc = f;  g->ud = ud;  g->mainthread = L;  g->seed = makeseed(L);  g->gcrunning = 0;  /* no GC while building state */  g->GCestimate = 0;  g->strt.size = g->strt.nuse = 0;  g->strt.hash = NULL;  setnilvalue(&g->l_registry);  luaZ_initbuffer(L, &g->buff);  g->panic = NULL;  g->version = NULL;  g->gcstate = GCSpause;  g->gckind = KGC_NORMAL;  g->allgc = g->finobj = g->tobefnz = g->fixedgc = NULL;  g->sweepgc = NULL;  g->gray = g->grayagain = NULL;  g->weak = g->ephemeron = g->allweak = NULL;  g->twups = NULL;  g->totalbytes = sizeof(LG);  g->GCdebt = 0;  g->gcfinnum = 0;  g->gcpause = LUAI_GCPAUSE;  g->gcstepmul = LUAI_GCMUL;  for (i=0; i < LUA_NUMTAGS; i++) g->mt[i] = NULL;  if (luaD_rawrunprotected(L, f_luaopen, NULL) != LUA_OK) {    /* memory allocation error: free partial state */    close_state(L);    L = NULL;  }  return L;}
看下preinit_thread 函数:
// lstate.c/*** preinitialize a thread with consistent values without allocating** any memory (to avoid errors)*/static void preinit_thread (lua_State *L, global_State *g) {  G(L) = g;  L->stack = NULL;  L->ci = NULL;  L->stacksize = 0;  L->twups = L;  /* thread has no upvalues */  L->errorJmp = NULL;  L->nCcalls = 0;  L->hook = NULL;  L->hookmask = 0;  L->basehookcount = 0;  L->allowhook = 1;  resethookcount(L);  L->openupval = NULL;  L->nny = 1; //将 L->nny 赋值1  L->status = LUA_OK;  L->errfunc = 0;}
那协程 lua_newthread时也会调用 preinit_thread ,但是两者对于pcall的实现却有不少的差异。
如下,协程创建时调用 preinit_thread 预处理协程:
// lstate.cLUA_API lua_State *lua_newthread (lua_State *L) {  global_State *g = G(L);  lua_State *L1;  lua_lock(L);  luaC_checkGC(L);  /* create new thread */  L1 = &cast(LX *, luaM_newobject(L, LUA_TTHREAD, sizeof(LX)))->l;  L1->marked = luaC_white(g);  L1->tt = LUA_TTHREAD;  /* link it on list 'allgc' */  L1->next = g->allgc;  g->allgc = obj2gco(L1);  /* anchor it on L stack */  setthvalue(L, L->top, L1);  api_incr_top(L);  preinit_thread(L1, g);  // 预处理协程时将 L->nny 赋值1  L1->hookmask = L->hookmask;  L1->basehookcount = L->basehookcount;  L1->hook = L->hook;  resethookcount(L1);  /* initialize L1 extra space */  memcpy(lua_getextraspace(L1), lua_getextraspace(g->mainthread),         LUA_EXTRASPACE);  luai_userstatethread(L, L1);  stack_init(L1, L);  /* init stack */  lua_unlock(L);  return L1;}
但是,协程在lua中是这样调用的:
local co = coroutine.create(func)coroutine.resume(co)
再看下 resume的实现:
LUA_API int lua_resume (lua_State *L, lua_State *from, int nargs) {  int status;  int oldnny = L->nny;  /* save "number of non-yieldable" calls */  lua_lock(L);  luai_userstateresume(L, nargs);  L->nCcalls = (from) ? from->nCcalls + 1 : 1;  L->nny = 0;  // 将 nny赋值 0,允许挂起  api_checknelems(L, (L->status == LUA_OK) ? nargs + 1 : nargs);  status = luaD_rawrunprotected(L, resume, &nargs); // 在保护模式启动协程  if (status == -1)  /* error calling 'lua_resume'? */    status = LUA_ERRRUN;  else {  /* continue running after recoverable errors */    while (errorstatus(status) && recover(L, status)) { // 检查是否有可恢复点      /* unroll continuation */      status = luaD_rawrunprotected(L, unroll, &status);    }    if (errorstatus(status)) {  /* unrecoverable error? */      L->status = cast_byte(status);  /* mark thread as 'dead' */      seterrorobj(L, status, L->top);  /* push error message */      L->ci->top = L->top;    }    else lua_assert(status == L->status);  /* normal end or yield */  }  L->nny = oldnny;  // 恢复协程的nny值  L->nCcalls--;  lua_assert(L->nCcalls == ((from) ? from->nCcalls : 0));  lua_unlock(L);  return status;}
可以看出协程的pcall处理有些不同,为什么会这样?
这是因为协程可挂起(yield),可恢复上下文(resume),就会有pcall执行过程有协程挂起的情况,所以需要记录还原点,然后从上下文恢复。
所以到这里也可以发现,除了创建的协程,lua state原生的协程是无法被挂起,通常只有在lua代码执行完时才会退出。

参考:http://blog.csdn.net/mycwq/article/details/49256003
1 0