使用JavaScript实现一个“字节码解释器”,并用它重新实现JS科学计算器的后端(后续4)

来源:互联网 发布:如何查看软件位数 编辑:程序博客网 时间:2024/05/16 23:47

成功解决了字节码指令生成的问题,掌握一个原则:

1、数值常量expr100一律是MovImm #imm, R0,但把要不要执行Push R0推迟到expr20-60里做判断;
2、假设任何表达式的指令生成结果都对应于其值在R0里,要不要Push 当且仅当:

     此子表达式出现在二元运算的左侧,且右侧需要递归。(注意,右侧如果只是一个数值常量的话,倒是可以直接MovImm #right_imm, R1,不需要对左侧的结果进行Push R0)

function AdvancedCalculator(){   //语法分析的原始输入流:   this.tokens = [];//中缀带括号的, 3种语法分析输入单位:类型为String的(和)、类型为Number的value、类型为Object/String的运算符   this.tokens_scan_index = 0;   this.saved_tokens_scan_index_stack = [];    //用于栈式自动机直接求值的转换后的流:   //this.tokens2_values = [];//后缀value栈   //this.tokens2_op = [];//运算符栈,由于去除了括号,所以只需要区分运算符是一元还是二元的         this.value_buffer = [];   this.assember = new Assembler();}

AdvancedCalculator.prototype = {   //复杂的运算符定义为单独的Object:   SQRT:  "Sqrt",   SIN:  "Sin",   COS:  "Cos",   TAN: "Tan",   COT: "Cot",   LOG:  "Log", //以10为底   LN:  "Ln", //以e为底   POW:  "Pow",//x^y   PI: Math.PI, //这是数值常量,不是运算符,不过也可以映射为0个输入的函数??   mapUnaryOperator2UnaryFunction: function(opToken){      if(opToken==this.SQRT)         return Math.sqrt;      else if(opToken==this.SIN)         return Math.sin;      else if(opToken==this.COS)         return Math.cos;      else if(opToken==this.TAN)         return Math.tan;      else if(opToken==this.COT)         return function(a){return 1/Math.tan(a);};      else if(opToken==this.LOG)         return Math.log10;      else if(opToken==this.LN)         return Math.log;      else         throw "未识别的一元运算符: "+opToken;   },   mapBinaryOperator2BinaryFunction: function(opToken){      if(opToken==this.POW)         return Math.pow;      else if(opToken=="+")         return function(a,b){return a+b;};      else if(opToken=="-")         return function(a,b){return a-b;};      else if(opToken=="*")         return function(a,b){return a*b;};      else if(opToken=="/")         return function(a,b){return a/b;};      else         throw "未识别的二元运算符: "+opToken;   },  nextToken: function(){      if (this.tokens_scan_index>=0 && this.tokens_scan_index<this.tokens.length){         var token_next = this.tokens[this.tokens_scan_index++];         return token_next;      }      return null;//throw "错误的调用:token流已经结束";   },  hasMoreTokens: function(){      return this.tokens_scan_index>=0 && this.tokens_scan_index<this.tokens.length;   },  pushTokenScanIndex: function(){     assert( this.tokens_scan_index>=0 && this.tokens_scan_index<this.tokens.length);     this.saved_tokens_scan_index_stack.push(this.tokens_scan_index);      return this.saved_tokens_scan_index_stack.length-1;  },  popTokenScanIndexAt: function(stack_index){     assert( stack_index>=0 && stack_index<this.saved_tokens_scan_index_stack.length);     while(this.saved_tokens_scan_index_stack.length>stack_index)        this.tokens_scan_index = this.saved_tokens_scan_index_stack.pop();  },  popTokenScanIndex: function(){      this.tokens_scan_index = this.saved_tokens_scan_index_stack.pop();  },  discardLastTokenScanIndex: function(){     this.saved_tokens_scan_index_stack.pop();  },   isUnaryOperator: function(token){      return token==this.SIN || token== this.COS  || token==this.TAN || token==this.COT || token==this.LOG || token==this.LN;   },   isBinaryFunctionToken: function(token){//特殊的二元函数      return token==this.POW;   },   isBinaryOperator: function(token){//所有的二元中缀操作符(包括二元函数)      return token=="+" || token=="-" || token=="*" || token=="/" || this.isBinaryFunctionToken(token);   },   isOperator: function(token){//返回:0/1单元运算符包括函数/2元运算符      if (this.isUnaryOperator(token))         return 1;      if (this.isBinaryOperator(token))         return 2;      return 0;   },   emitToken: function(token){       this.tokens.push(token);   },   emitValueTokenIfAny: function(){      //检查之前缓存的value_buffer      if (this.value_buffer.length>0) {         var value_str = this.value_buffer.join('');         var value = Number(value_str); //a Number         this.emitToken(value);         this.value_buffer = []; //reset;      }   },   emitButton: function(btn){      if (btn=="(" || btn==")"){//括号是一种特殊的优先级运算符         this.emitValueTokenIfAny();         this.emitToken(btn);      }      else if (this.isOperator(btn)){         this.emitValueTokenIfAny();         this.emitToken(btn);      }else{//0,1,2,3,4,5,6,7,8,9,.         this.value_buffer.push(btn);      }   },   emitButtons: function(btns){      for(var i=0; i<btns.length; ++i){        var btn = btns[i];        this.emitButton(btn);      }   },   //核心算法:如何把一个中缀的混合value和operator的流转换为分离的value和operator的求值栈?   concat: function(target, source){      while(source.length>0){         var item = source.shift();         target.push(item);      }   },   evalExpr: function(){      return this.evalExpr20();//利用短路特性,前一个得到true的话后续子表达式不会执行   },   evalExpr100: function(){      if( !this.hasMoreTokens() )         return [false,];//push操作暂存流位置之前最好都检查一下?区分2种情况:流中无可解析token VS token语法错误      this.pushTokenScanIndex();      var next_token = this.nextToken();      if(next_token==null)         throw "流非正常结束,此处应有一数值value!";      assert(typeof next_token=="number");      if(typeof next_token=="number"){         this.assember.emitInstruction({type: "MovImm", arg: next_token, arg1: "R0"});         //this.assember.emitInstruction({type: "Push", arg: "R0"});         //正常情况下不需要push,只有发现此常量参与了一个二元原语函数的运算左端,而右端是一个需要递归的子表达式的时候         return [true,next_token];      }      this.popTokenScanIndex();      return [false,];   },   evalExpr80: function(){//括号表达式: 似乎不需要特殊处理?因为它只是改变了子表达式的优先级而已      if( !this.hasMoreTokens() )         return [false,];//push操作暂存流位置之前最好都检查一下?区分2种情况:流中无可解析token VS token语法错误       this.pushTokenScanIndex();       var next_token = this.nextToken();       if (next_token==null){//流已经结束          return false;       }       if (next_token=="("){         var result = this.evalExpr(); //if has ES6 destructing, can write as var [success, value] = ...         if(!result[0])            throw "TODO: fixme";//此时saved_tokens_scan_index需要维护成一个栈了         var next_next_token = this.nextToken();         if(next_next_token==null)            throw "流异常结束:expect a )";         assert( next_next_token==")" );         {            //this.assember.emitInstruction({type: "Pop", arg: "R0"});            //this.assember.emitInstruction({type: "Push", arg: "R0"});         }         return result;      }      //else:      this.popTokenScanIndex();      var result = this.evalExpr100();      //this.assember.emitInstruction({type: "Pop", arg: "R0"});      return result;   },   evalExpr60: function(){//一元函数      if( !this.hasMoreTokens() )         return [false,];//push操作暂存流位置之前最好都检查一下?区分2种情况:流中无可解析token VS token语法错误       this.pushTokenScanIndex();       var next_token = this.nextToken();       if (next_token==null) {         throw "流异常结束:期望一个Expr60";       }       if(this.isUnaryOperator(next_token)){         var unaryOp = next_token;         var result = this.evalExpr80();         if (!result[0])            throw "非法表达式!";//此时saved_tokens_scan_index需要维护成一个栈了, TODO: 支持 sin sin 1的语法?         var unaryFunc = this.mapUnaryOperator2UnaryFunction(unaryOp);         {            //this.assember.emitInstruction({type: "Pop", arg: "R0"});            this.assember.emitInstruction({type: "CallPrimitiveFunction", arg: next_token});;            //this.assember.emitInstruction({type: "Push", arg: "R0"});         }         return [true, unaryFunc(result[1])];      }      this.popTokenScanIndex();      var result = this.evalExpr80();      //this.assember.emitInstruction({type: "Push", arg: "R0"});      return result;   },   evalExpr50: function(){//二元函数,如x^y(Pow求幂)      if( !this.hasMoreTokens() )         return [false,];      //expr50  := expr60 x^y expr50 | expr60      var result = this.evalExpr60();      if (result[0]) {         var tmp_value = result[1];         if( !this.hasMoreTokens() ) {            //this.assember.emitInstruction({type: "Push", arg: "R0"})            return [true, tmp_value];         }         this.pushTokenScanIndex();         var next_token = this.nextToken();//should use let;         while(this.isBinaryFunctionToken(next_token)){            //右递归之前,需要将当前的R0压栈:            this.assember.emitInstruction({type: "Push", arg: "R0"});            var result2 = this.evalExpr50();            if (!result2[0]) {               //Here: 二元函数运算符(如x^y)已经匹配,但右边的子表达式不匹配,则输入无效               throw "Input Invalid";            }            //这里的递归已经处理了结合性的问题            var binFunc = this.mapBinaryOperator2BinaryFunction(next_token);            tmp_value = binFunc(tmp_value, result2[1]);            {               //将当前右递归的运算结果(R0)移动到R1:               this.assember.emitInstruction({type: "Mov", arg: "R0", arg1: "R1"});               //将之前压栈的左侧值出栈:               this.assember.emitInstruction({type: "Pop", arg: "R0"});               this.assember.emitInstruction({type: "CallPrimitiveFunction", arg: next_token});            }            //            this.discardLastTokenScanIndex();            if( !this.hasMoreTokens() ){                //this.assember.emitInstruction({type: "Push", arg: "R0"})                return [true, tmp_value];            }         }         this.popTokenScanIndex();         //this.assember.emitInstruction({type: "Push", arg: "R0"})         return [true,tmp_value];      }      return [false,];   },   evalExpr40: function(){//二元乘除, 乘除运算都认为是左结合的      //expr40 := expr50 | expr50 ( '*' expr50 )* | expr50 ( '/' expr50)*      //如果解析失败,恢复输入token流的扫描初始位置      if( !this.hasMoreTokens() )         return [false,];//push操作暂存流位置之前最好都检查一下?区分2种情况:流中无可解析token VS token语法错误      this.pushTokenScanIndex();      var result = this.evalExpr50();      if (result[0]) {         var tmp_value = result[1];         if( !this.hasMoreTokens() )            return [true, tmp_value];         this.pushTokenScanIndex();         var next_token = this.nextToken();//should use let;         while(next_token=="*" || next_token=="/"){            //右递归之前,需要将当前的R0压栈:            this.assember.emitInstruction({type: "Push", arg: "R0"});            //            var result2 = this.evalExpr50();            if (!result2[0]) {               //Here: *或/运算符已经匹配,后右边的子表达式不匹配,则输入无效               throw "Input Invalid";            }            //成功:             //var binFunc = this.mapBinaryOperator2BinaryFunction(next_token);             //tmp_value = binFunc(tmp_value, result2[1]);             if(next_token=="*")                tmp_value *= result2[1];             else                tmp_value /= result2[1];             {                //乘法和除法运算都是左结合的,问题是,这里子表达式的优先级都大于*/                //正常情况下,先算左边的子表达式,压栈,再算右边的,压栈,所以:                this.assember.emitInstruction({type: "Mov", arg: "R0", arg1: "R1"});                this.assember.emitInstruction({type: "Pop", arg: "R0"})                this.assember.emitInstruction({type: "CallPrimitiveFunction", arg: next_token})                //注意,这里assembler的写法与二元函数expr60的类似,不同之处在于parser的控制流程:一个是递归,一个是while             }             //             this.discardLastTokenScanIndex();            //下一次循环:            if( !this.hasMoreTokens() )               return [true, tmp_value];//流已经结束,当前expr40子表达式解析完成(但不代表整体成功)            this.pushTokenScanIndex();            next_token = this.nextToken();         }//end while;         assert( next_token!="*" && next_token!="/");         this.popTokenScanIndex();//回退一个* /的位置,注意,这时可以清除流解析回退栈了(不清除其实也没关系)         return [true,tmp_value];//最顶层的push不用pop了;      }      this.popTokenScanIndex();      throw "Invalid Input: expect expr40 here";    },   //TODO: FIXME 对二元运算符而言,不管其结合性如何、是否满足交换律,优先级高的子表达式先运算!!!   //        但是现在不需要以“编译器”的行为来考虑问题,只是解释器,表达式可以认为没有负作用(赋值语句),则可以直接一边语法解析一边求值   evalExpr20: function(){//二元加减      if( !this.hasMoreTokens() )         return [false,];     //如果解析失败,不用恢复,直接报错     //expr20 := expr40 | expr40 ('+' expr40)* | expr40 ('-' expr40)* //加法可以是右结合的,减法不行, 这里把expr20改为expr40使得加法左结合     //         | expr40 '+' expr20  //这么一来,加法将变成右结合的,不对;     //this.pushTokenScanIndex();     var result = this.evalExpr40();      if (!result[0]) {         return [false,];//整个表达式解析失败      }      //循环地向前看一个运算符,或者是+,或者是-      var tmp_value = result[1];      if( !this.hasMoreTokens() )         return [true, tmp_value];      this.pushTokenScanIndex();      var next_token=this.nextToken();      if(next_token==null){            //注意,前面已经有一个expr40解析成功,所以这里即使流已经结束,仍然可以成功返回            this.discardLastTokenScanIndex();            return [true, tmp_value];         }      while(next_token=="+" || next_token=="-"){         if(next_token=="+"){            //右递归之前,需要将当前的R0压栈:            this.assember.emitInstruction({type: "Push", arg: "R0"});            //            var result2 = this.evalExpr40(); //<-- 必须把+运算parse为右递归,否则无法处理 1+2+3 这种情况            if (!result2[0]) {              return false;//整个表达式解析失败            }            tmp_value += result2[1];            {              this.assember.emitInstruction({type: "Mov", arg: "R0", arg1: "R1"});              this.assember.emitInstruction({type: "Pop", arg: "R0"});              this.assember.emitInstruction({type: "CallPrimitiveFunction", arg: "+"});            }         }else{//"-"            //右递归之前,需要将当前的R0压栈:            this.assember.emitInstruction({type: "Push", arg: "R0"});            //            var result2 = this.evalExpr40();            if (!result2[0]) {              return false;//整个表达式解析失败            }            tmp_value -= result2[1];            {              this.assember.emitInstruction({type: "Mov", arg: "R0", arg1: "R1"});              this.assember.emitInstruction({type: "Pop", arg: "R0"});              this.assember.emitInstruction({type: "CallPrimitiveFunction", arg: "-"});            }         }         //成功的情况:         this.discardLastTokenScanIndex();         //下一次循环:         if( !this.hasMoreTokens() )               return [true, tmp_value];//流已经结束,当前expr20子表达式解析完成(但不代表整体成功)         this.pushTokenScanIndex();         next_token = this.nextToken();      }//end while            assert( next_token!="+" && next_token!="-"); //非法期刊:1+2-      this.popTokenScanIndex();      return [true, tmp_value];   },   calc: function(){      this.emitValueTokenIfAny();//!!!      //输入全部在tokens里,视为一个正确的表达式输入流,后期也可以考虑错误处理      var result = this.evalExpr();//[success/fail, value]      {        alert(this.assember.toString());        var intercepter = new BytecodeIntercepter();        var interceptEvalResult = intercepter.eval(this.assember.getResult());        alert("字节码解释器求值结果="+interceptEvalResult+" \r\n直接递归下降解释执行结果="+result);      }      //assert( result[0] );      return result[1];   }}

parser的代码目前同时做2件事情:(1)老的直接在递归下降解析过程中求值,(2)新的通过Assembler生成字节码指令。


测试代码:

alert("7: sin(1+2)+cos(3-4)-tan(5*6)");var ac = new AdvancedCalculator();ac.emitButtons([ac.SIN, "(", "1", "+", "2", ")", "+", ac.COS, "(", "3", "-", "4", ")", "-", ac.TAN, "(", "5", "*", "6", ")"]);var result = ac.calc();assertEquals(result, Math.sin(1+2)+Math.cos(3-4)-Math.tan(5*6));

成功输出:

MovImm 1 R0
Push R0
MovImm 2 R0
Mov R0 R1
Pop R0
CallPrimitiveFunction +
CallPrimitiveFunction Sin
Push R0
MovImm 3 R0
Push R0
MovImm 4 R0
Mov R0 R1
Pop R0
CallPrimitiveFunction -
CallPrimitiveFunction Cos
Mov R0 R1
Pop R0
CallPrimitiveFunction +
Push R0
MovImm 5 R0
Push R0
MovImm 6 R0
Mov R0 R1
Pop R0
CallPrimitiveFunction *
CallPrimitiveFunction Tan
Mov R0 R1
Pop R0
CallPrimitiveFunction -

字节码解释器求值结果=7.086753510574282
直接递归下降解释执行结果=true,7.086753510574282


下一步工作:编写一个可视化界面?将JS代码格式化一下,然后变量命名再重构一下?加上AST生成和转换成JS运算表达式的支持?


0 0
原创粉丝点击