基于统计方法的二字词发掘程序(改进)

来源:互联网 发布:linux系统安装snmp服务 编辑:程序博客网 时间:2024/04/29 23:32

增加了一定的自学习能力.

 

 

package test.word;

import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.HashMap;
import java.util.Iterator;

public class Word {

 private HashMap words = new HashMap();
 private HashMap result = new HashMap();
 
 
 public void run(String content) {
  //load
  try {
   words = this.load("d:/w/words.obj");
  } catch (FileNotFoundException e) {
   // TODO Auto-generated catch block
   e.printStackTrace();
   //init
   this.save(words, "d:/w/words.obj");
  } catch (IOException e) {
   // TODO Auto-generated catch block
   e.printStackTrace();
  } catch (ClassNotFoundException e) {
   // TODO Auto-generated catch block
   e.printStackTrace();
  }

  this.save(result, "d:/w/result.obj");
  
  //split
  scan(content);
  
  this.save(result, "d:/w/result.obj");
  //update
  //rename
  
  //view
  this.list(result);
 }
 

 
 public void scan(String content) {
  int l = content.length();
  StringBuilder builder = new StringBuilder();
  StringBuilder t = new StringBuilder();
  char c = 0;
  char o = 0;
  for(int i=0;i<l;i++) {
   //System.out.println(content.charAt(i));
   c = content.charAt(i);
   
   if(split(c)) {
    if(o == 0) {
     builder.delete(0, builder.length());
     //o = 0;
     c = 0;
    } else {
     this.insert(t.append(o).toString());
     t.delete(0, t.length());
    }
    //System.out.println();
    //System.out.println(builder.toString());

   } else {
    //builder.append(c);
    if(o != 0) {
     if( (isNum(o) && isNum(c)) || (!isNum(o) && !isNum(c)) ) {
      t.append(o).append(c);
      
      if(this.exist(t.toString())) {
       o = 0;
       this.insert(t.toString());
      } else {
       if(this.insert(t.toString())) {
        o = 0;
       } else {
        o = c;
       }
      }
      
      t.delete(0, t.length());
     } else {
      this.insert(t.append(o).toString());
      t.delete(0, t.length());
      o = c;
     }
    } else {
     o = c;
    }
   }
  }
 }
 
 public boolean isNum(char c) {
  boolean result = false;
  String chars = "0123456789";
  for(int i=0;i<chars.length();i++) {
   if(chars.charAt(i) == c) {
    result = true;
    break;
   }
  }
  return result;
 }
 
 
 public boolean exist(String t) {
  boolean flag = false;
  Object obj;
  int k = 0;
  
  obj = words.get(t.toString());
  
  if(obj != null) {
   k = (Integer) obj;
   if(k > 1)flag = true;
  } else {
   flag = false;
  }
  
  return flag;
 }
 
 
 public boolean insert(String t) {
  boolean flag = false;
  Object obj;
  int k = 0;
  
  obj = result.get(t.toString());
  
  if(obj != null) {
   k = (Integer) obj;
   if(k > 2)flag = true;
   k += 1;
   result.put(t.toString(), k);
  } else {
   result.put(t.toString(), 1);
   flag = false;
  }
  return flag;
 }
 
 
 public boolean split(char c) {
  boolean result = false;
  String chars = " 《》,。-{}(()[];/":!?“”…、,.;!?/n/r";
  for(int i=0;i<chars.length();i++) {
   if(chars.charAt(i) == c) {
    result = true;
    break;
   }
  }
  return result;
 }
 
 public HashMap load(String file) throws FileNotFoundException, IOException, ClassNotFoundException {
  HashMap map = null;
  
  ObjectInputStream in;

  in = new ObjectInputStream(new FileInputStream(file));
  
  map = (HashMap) in.readObject();
  
  in.close();

  return map;
 }
 
 public void save(HashMap map, String file) {
  ObjectOutputStream out;
  
  try {
   out = new ObjectOutputStream(new FileOutputStream(file));
   
   out.writeObject(map);
   
   out.close();
   
  } catch (FileNotFoundException e) {
   // TODO Auto-generated catch block
   e.printStackTrace();
  } catch (IOException e) {
   // TODO Auto-generated catch block
   e.printStackTrace();
  }
 }
 

 public void list(HashMap map) {

  Iterator it = map.keySet().iterator();
  while(it.hasNext()) {
   String t = (String) it.next();
   int k = (Integer) map.get(t);
   System.out.println(t + " " + k);
  }

 }

}

原创粉丝点击