两层查询词扩展描述的简单排序

来源:互联网 发布:误删数据库怎么恢复 编辑:程序博客网 时间:2024/05/29 19:56
</pre><pre name="code" class="java">package com.alg.qid.segword;import java.io.BufferedReader;import java.io.File;import java.io.FileInputStream;import java.io.FileOutputStream;import java.io.IOException;import java.io.InputStreamReader;import java.io.OutputStreamWriter;import java.io.PrintWriter;import java.util.ArrayList;import java.util.Collections;import java.util.Comparator;import java.util.HashMap;import java.util.Iterator;import java.util.LinkedHashMap;import java.util.List;import java.util.Map;import java.util.Map.Entry;import org.ansj.domain.Term;import org.ansj.splitWord.analysis.ToAnalysis;import com.alg.qid.sent2vec.MapValueComparator;public class QueryFilterDistance {public static void main(String argv[]) throws IOException {if (argv.length < 1) {System.err.println("Parameter missing.");System.err.println("Usage: AppName CorpusFile");return;}String input = argv[1];PrintWriter pw = new PrintWriter(new OutputStreamWriter(new FileOutputStream(input + ".6disfilter"), "utf-8"), true);PrintWriter pw1 = new PrintWriter(new OutputStreamWriter(                new FileOutputStream(input + ".dis10more"), "utf-8"), true);BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(new File(input)), "utf-8"));Map<String, Map<String, Double>> bigram = new HashMap<String,Map<String, Double>>();Map<String, Map<String, Map<String, Double>>> trigram = new HashMap<String, Map<String, Map<String, Double>>>();int count = 0;String line = null;        while ((line = br.readLine()) != null) {            count +=1;            System.out.println(count);            String[] seg = line.split("\t");            if(seg[0].contains(seg[1])||seg[1].contains(seg[2])||seg[0].contains(seg[2])){                continue;            }else if(seg.length == 5){                String[] data = seg[4].split(" ");                if(!(data.length == 13)){                    continue;                }                //bigram                Double value2 = 0.3*Double.parseDouble(data[4])+Double.parseDouble(data[7]);                if(bigram.containsKey(seg[0])){                    bigram.get(seg[0]).put(seg[1], value2);                }else{                    Map<String,Double> uni = new HashMap<String,Double>();                    uni.put(seg[1], value2);                    bigram.put(seg[0], uni);                }                //trigram                Double value3 = 0.1*Double.parseDouble(data[6])+Double.parseDouble(data[8])+Double.parseDouble(data[9]);                if(trigram.containsKey(seg[0])){                    if(trigram.get(seg[0]).containsKey(seg[1])){                        trigram.get(seg[0]).get(seg[1]).put(seg[2], value3);                    }else{                        Map<String, Double> u = new HashMap<String, Double>();                        u.put(seg[2], value3);                        trigram.get(seg[0]).put(seg[1], u);                        trigram.put(seg[0], trigram.get(seg[0]));                    }                }else{                    Map<String, Double> unig = new HashMap<String, Double>();                    unig.put(seg[2], value3);                    Map<String, Map<String, Double>> big = new HashMap<String, Map<String, Double>>();                    big.put(seg[1], unig);                    trigram.put(seg[0], big);                }            }                    }        int num =0 ;        int unicount =0;        for(Entry<String, Map<String, Map<String, Double>>> e : trigram.entrySet()){            unicount +=1;            Map<String,Double> sortbigram = SortMapByValue(bigram.get(e.getKey()));            int bicount = 0;            for(Entry<String, Double> ee:sortbigram.entrySet()){                bicount +=1;                if(bicount < 11){                    Map<String,Double> sorttrigram = SortMapByValue(trigram.get(e.getKey()).get(ee.getKey()));                    int tricount = 0;                    for(Entry<String, Double> eee:sorttrigram.entrySet()){                        tricount +=1;                        if(tricount < 11){                            num +=1;                            System.out.println("num="+num+"\t"+"bicount="+bicount+"\t"+"tricount="+tricount);                            pw.println(e.getKey()+"\t"+ee.getKey()+"\t"+eee.getKey()+"\t"                            +unicount+" "+bicount+" "+tricount+" "+ee.getValue()+" "+eee.getValue());                            System.out.println(e.getKey()+"\t"+ee.getKey()+"\t"+eee.getKey()+"\t"                            +unicount+" "+bicount+" "+tricount+" "+ee.getValue()+" "+eee.getValue());                        }                                            }                }                            }        }        ///////////////////        for(Entry<String, Map<String, Map<String, Double>>> e : trigram.entrySet()){            unicount +=1;            Map<String,Double> sortbigram = SortMapByValue(bigram.get(e.getKey()));            int bicount = 0;            for(Entry<String, Double> ee:sortbigram.entrySet()){                bicount +=1;                Map<String,Double> sorttrigram = SortMapByValue(trigram.get(e.getKey()).get(ee.getKey()));                int tricount = 0;                for(Entry<String, Double> eee:sorttrigram.entrySet()){                    tricount +=1;                    num +=1;                    System.out.println("num="+num+"\t"+"bicount="+bicount+"\t"+"tricount="+tricount);                    pw1.println(e.getKey()+"\t"+ee.getKey()+"\t"+eee.getKey()+"\t"                    +unicount+" "+bicount+" "+tricount+" "+ee.getValue()+" "+eee.getValue());                                                   }                           }        }        //br.close();pw.close();pw1.close();}    private static Map<String, Double> SortMapByValue(Map<String, Double> map) {        if (map == null || map.isEmpty()) {            return null;        }        Map<String, Double> sortedMap = new LinkedHashMap<String, Double>();        List<Map.Entry<String, Double>> entryList = new ArrayList<Map.Entry<String, Double>>(map.entrySet());        Collections.sort(entryList, new MYMapValueComparator());        Iterator<Map.Entry<String, Double>> iter = entryList.iterator();        Map.Entry<String, Double> tmpEntry = null;        while (iter.hasNext()) {            tmpEntry = iter.next();            sortedMap.put(tmpEntry.getKey(), tmpEntry.getValue());        }        return sortedMap;    }}

实验结果:

奶茶店 logo vi 1 1 1 -0.4939682812736917 0.6573862087355775
奶茶店 logo 名片 1 1 2 -0.4939682812736917 0.6481132287355775
奶茶店 logo 招牌 1 1 3 -0.4939682812736917 0.5737873487355776
奶茶店 logo 标志 1 1 4 -0.4939682812736917 0.4983242387355775
奶茶店 logo 门头 1 1 5 -0.4939682812736917 0.4893510987355775
奶茶店 logo 设计 1 1 6 -0.4939682812736917 0.45985912873557755
奶茶店 logo 店名 1 1 7 -0.4939682812736917 0.4446407187355775
奶茶店 logo 店面 1 1 8 -0.4939682812736917 0.3900299587355775
奶茶店 logo 包装 1 1 9 -0.4939682812736917 0.3418655687355775
奶茶店 logo 字体 1 1 10 -0.4939682812736917 0.32816370873557754
奶茶店 店面 装修 1 2 1 -0.7673270142985424 0.8558337530771946
奶茶店 店面 效果图 1 2 2 -0.7673270142985424 0.6919179130771945
奶茶店 店面 平面图 1 2 3 -0.7673270142985424 0.5538915830771944
奶茶店 店面 logo 1 2 4 -0.7673270142985424 0.43575150307719446
奶茶店 店面 设计 1 2 5 -0.7673270142985424 0.37641578307719453
奶茶店 店面 形象 1 2 6 -0.7673270142985424 0.26372490307719454
奶茶店 招牌 门头 1 3 1 -0.8218455342985425 1.2669645830771945
奶茶店 招牌 海报 1 3 2 -0.8218455342985425 0.7312208530771945
奶茶店 招牌 logo 1 3 3 -0.8218455342985425 0.6467678230771945
奶茶店 招牌 设计 1 3 4 -0.8218455342985425 0.44883581307719445
奶茶店 招牌 形象 1 3 5 -0.8218455342985425 0.2284081030771945
奶茶店 招牌 优化 1 3 6 -0.8218455342985425 -0.10880770192280549
奶茶店 灯箱 招牌 1 4 1 -0.8318617625630081 1.0741451358320164
奶茶店 灯箱 易拉宝 1 4 2 -0.8318617625630081 0.7619603258320164
奶茶店 灯箱 宣传画 1 4 3 -0.8318617625630081 0.6066572158320165
奶茶店 灯箱 广告 1 4 4 -0.8318617625630081 0.4675840858320164
奶茶店 灯箱 菜单 1 4 5 -0.8318617625630081 0.3331858758320164
奶茶店 灯箱 图片 1 4 6 -0.8318617625630081 0.26625460583201643
奶茶店 灯箱 设计 1 4 7 -0.8318617625630081 0.2487683858320164
奶茶店 灯箱 产品 1 4 8 -0.8318617625630081 -0.010310164167983596
奶茶店 设计 logo 1 5 1 -0.8352661461823814 0.5819150170384741
奶茶店 设计 门头 1 5 2 -0.8352661461823814 0.4373177570384741
奶茶店 设计 制作 1 5 3 -0.8352661461823814 0.4077541370384741
奶茶店 设计 效果图 1 5 4 -0.8352661461823814 0.38852906703847406
奶茶店 设计 标志 1 5 5 -0.8352661461823814 0.34886226703847406
奶茶店 设计 甜品 1 5 6 -0.8352661461823814 0.34813677703847407
奶茶店 设计 海报 1 5 7 -0.8352661461823814 0.3343255470384741
奶茶店 设计 装修 1 5 8 -0.8352661461823814 0.27377736703847405
奶茶店 设计 宣传单 1 5 9 -0.8352661461823814 0.24935150703847409
奶茶店 设计 餐厅 1 5 10 -0.8352661461823814 0.19175159703847408

0 0
原创粉丝点击