Java weka分类返回分类名称classname

来源:互联网 发布:js报表控件 编辑:程序博客网 时间:2024/05/29 09:32

使用eclipse 、Java 、weka


1、在weka的安装目录下找到weka.jar和weka-src.jar两个jar包,如下图所示:
这里写图片描述
2、在eclipse中新建Java项目,然后右键build-path,在Libraries标签页里点击添加外部jar吧,然后将上一步中找到的weka.jar和weka-src.jar添加进去,然后点击OK,如下图所示:
这里写图片描述
3、在新建的Java项目中新建包wekaTest和类J48Test,将分词后的中文文本添加筛选器StringToWordVector(对文本数据进行预处理),然后使用weka自带的分类算法进行分类,并返回每个实例对应的分类名称,具体实现代码如下:

package wekaTest;import java.io.File;import weka.classifiers.Evaluation;import weka.classifiers.bayes.NaiveBayes;import weka.classifiers.trees.J48;import weka.classifiers.trees.RandomForest;import weka.core.Instance;import weka.core.Instances;import weka.core.converters.ArffLoader;import weka.filters.Filter;import weka.filters.unsupervised.attribute.StringToWordVector;public class J48Test {    public static void main(String[] args) throws Exception {        Instances ins = null;        try {               File file = new File("E:\\data.arff");               ArffLoader loader = new ArffLoader();               loader.setFile(file);               ins = loader.getDataSet();                             ins.setClassIndex(ins.numAttributes()-1);               StringToWordVector filter = new StringToWordVector();               filter.setIDFTransform(true);               filter.setTFTransform(true);               filter.setInputFormat(ins);               Instances newIns = Filter.useFilter(ins, filter);               //System.out.println(newIns.toString());               RandomForest RFmodel = new RandomForest();               J48 Jmodel = new J48();               NaiveBayes NBmodel = new NaiveBayes();               RFmodel.buildClassifier(newIns);               Jmodel.buildClassifier(newIns);               NBmodel.buildClassifier(newIns);               Instance testInst;               Evaluation testingEvaluationRF = new Evaluation(newIns);               Evaluation testingEvaluationJ48 = new Evaluation(newIns);               Evaluation testingEvaluationNB = new Evaluation(newIns);               int length = newIns.numInstances();               for(int i = 0; i < length ; i++){                  testInst = newIns.instance(i);                         //System.out.println(newIns.classAttribute().value((int) RFmodel.classifyInstance(newIns.instance(i))));//输出分类的类名                 //System.out.println(testInst.classAttribute().value((int) RFmodel.classifyInstance(testInst)));                  testingEvaluationRF.evaluateModelOnceAndRecordPrediction(RFmodel, testInst);                  testingEvaluationJ48.evaluateModelOnceAndRecordPrediction(Jmodel, testInst);                  testingEvaluationNB.evaluateModelOnceAndRecordPrediction(NBmodel, testInst);               }               System.out.println("RandomForest的正确率:"+(1-testingEvaluationRF.errorRate()));               System.out.println("J48的正确率:"+(1-testingEvaluationJ48.errorRate()));               System.out.println("NaiveBayes的正确率:"+(1-testingEvaluationNB.errorRate()));//             System.out.println("RandomForest:"+testingEvaluationRF.toSummaryString());//输出总结信息//             System.out.println("RandomForest:"+testingEvaluationRF.toClassDetailsString());//输出分类详细信息//             System.out.println("RandomForest:"+testingEvaluationRF.toMatrixString());//输出分类的混淆矩阵          } catch (Exception e) {               e.printStackTrace();          }    }}

然后运行查看结果

原创粉丝点击