搜索排序插件语言的选择

来源:互联网 发布:数据挖掘技术及应用 编辑:程序博客网 时间:2024/05/22 04:49

es中支持用groovy定制开发排序,但实际使用时发现性能较差。

动态语言的好处是写代码时不用做类型转换,但用非jvm的脚本(js,lua)需要把很多对象复制、重构一遍,代价太大。用scala看起来可以对性能和易用性做个折中(还可能要做类型转换,但不需要声明类型,不需要写return,比Java稍好,性能相当)。以下为测试:

    private static void testGroovy() {        GroovyScriptEngineService service = new GroovyScriptEngineService();        String groovyScript = "priceLimit = doc['priceLimit'];\n" +                "if (priceLimit == 0.0) { return 1; }\n" +                "score = (doc['parValue'] + C.toFloat()) / priceLimit;\n" +                "if (score < 1) { return score; } else { return 1; }";        ;        CompiledScript script = new CompiledScript(ScriptType.FILE, "groovy", "GROOVY", service.compile(groovyScript, null));        Map<String, Object> params = new HashMap<String, Object>();        Map<String, Object> docMap = new HashMap<String, Object>();        params.put("doc", docMap);        params.put("C", "13");        // warmup        System.err.println("Warming up...");        for (int k = 0; k < 1000; ++k) {            for (int i = 0; i < 30000; ++i) {                params.put("_doc", i);                docMap.put("priceLimit", Math.random() * 10000 - 5000);                docMap.put("parValue", Math.random() * 10000 - 5000);                LeafScoreScript scoreScript = service.executable(script, null, params);                scoreScript.runAsDouble();            }        }        // test        System.err.println("Begin to test...");        long old = System.currentTimeMillis();        for (int k = 0; k < 1000; ++k) {            for (int i = 0; i < 30000; ++i) {                params.put("_doc", i);                docMap.put("priceLimit", Math.random() * 10000 - 5000);                docMap.put("parValue", Math.random() * 10000 - 5000);                LeafScoreScript scoreScript = service.executable(script, null, params);                scoreScript.runAsDouble();            }        }        System.err.println("Groovy Elapsed Ticks: " + (System.currentTimeMillis() - old) + " ms.");    }    private static double calcScore(Object doc, Object C, Object _doc) {        double priceLimit = (double)(((Map<String, Object>)doc).get("priceLimit"));        if (priceLimit == 0.0) {            return 1;        }        double score = ((double)((Map<String, Object>)doc).get("parValue") + Double.parseDouble(C.toString()) ) / priceLimit;        if (score < 1) {            return score;        } else {            return 1;        }    }    private static void testJava() {        Map<String, Object> params = new HashMap<String, Object>();        Map<String, Object> docMap = new HashMap<String, Object>();        params.put("doc", docMap);        String C = "13";        // warmup        System.err.println("Warming up...");        for (int k = 0; k < 1000; ++k) {            for (int i = 0; i < 30000; ++i) {                Integer _doc = i;                docMap.put("priceLimit", Math.random() * 10000 - 5000);                docMap.put("parValue", Math.random() * 10000 - 5000);                calcScore(docMap, C, _doc);            }        }        // test        System.err.println("Begin to test...");        long old = System.currentTimeMillis();        for (int k = 0; k < 1000; ++k) {            for (int i = 0; i < 30000; ++i) {                Integer _doc = i;                docMap.put("priceLimit", Math.random() * 10000 - 5000);                docMap.put("parValue", Math.random() * 10000 - 5000);                calcScore(docMap, C, _doc);            }        }        System.err.println("Java Elapsed Ticks: " + (System.currentTimeMillis() - old) + " ms.");    }    private static void testScala() throws javax.script.ScriptException {        Map<String, Object> params = new HashMap<String, Object>();        Map<String, Object> docMap = new HashMap<String, Object>();        params.put("doc", docMap);        params.put("C", "13");        ScriptEngine scriptEngine = new ScriptEngineManager().getEngineByName("scala");        ((MutableSettings.BooleanSetting)(((IMain)scriptEngine).settings()                .usejavacp())).value_$eq(true);        scriptEngine.put("_doc", docMap);        scriptEngine.put("_C", "13");        String script = "val doc = _doc.asInstanceOf[java.util.Map[String, Object]]\n" +                "val C = _C.toString\n" +                "val priceLimit = doc.get(\"priceLimit\").asInstanceOf[Double]\n" +                "if (priceLimit == 0.0) { 1 }\n" +                "val score = (doc.get(\"parValue\").asInstanceOf[Double] + java.lang.Double.parseDouble(C.toString)) / priceLimit\n" +                "Math.min(score, 1)";        System.err.println(script);        final javax.script.CompiledScript compiled = ((Compilable)scriptEngine).compile(script);        // warmup        System.err.println("Warming up...");        for (int k = 0; k < 1000; ++k) {            for (int i = 0; i < 30000; ++i) {                params.put("_doc", i);                docMap.put("priceLimit", Math.random() * 10000 - 5000);                docMap.put("parValue", Math.random() * 10000 - 5000);                compiled.eval();            }        }        // test        System.err.println("Begin to test...");        long old = System.currentTimeMillis();        for (int k = 0; k < 1000; ++k) {            for (int i = 0; i < 30000; ++i) {                params.put("_doc", i);                docMap.put("priceLimit", Math.random() * 10000 - 5000);                docMap.put("parValue", Math.random() * 10000 - 5000);                compiled.eval();            }        }        System.err.println("Scala Elapsed Ticks: " + (System.currentTimeMillis() - old) + " ms.");    }    public static void main(String[] args) throws javax.script.ScriptException {        testScala();        testJava();        testGroovy();    }

测试结果如下:


scala script比原生Java函数调用多20%的overhead,groovy则慢10倍。

用scala script engine需要增加依赖:

<dependency>    <groupId>org.scala-lang</groupId>    <artifactId>scala-library</artifactId>    <version>2.11.7</version></dependency>
<dependency>    <groupId>org.apache.clerezza.scala</groupId>    <artifactId>script-engine</artifactId>    <version>1.0.0</version></dependency>


原创粉丝点击