基于统计方法的二字词发掘程序
来源:互联网 发布:8080端口无法访问 编辑:程序博客网 时间:2024/04/29 18:49
package test;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.ObjectOutputStream;
import java.util.HashMap;
public class Letter {
private HashMap letter = new HashMap();
public void run() {
char c = 0;
for(int i=19968;i<40870;i++) {
c = (char) i;
//System.out.print(i + " " +c);
System.out.print(c + " ");
letter.put(c, c);
if(i % 255 == 0) {
System.out.println();
}
}
this.save(letter, "d:/letter.obj");
}
public void run1() {
byte[] c = new byte[2];
byte i , j;
for(i=0;i<255;i++) {
for(j=0;j<255;j++) {
c[0] = i;
c[1] = j;
System.out.print(String.valueOf(c) + " ");
}
System.out.println();
}
}
public void save(HashMap map, String file) {
ObjectOutputStream out;
try {
out = new ObjectOutputStream(new FileOutputStream(file));
out.writeObject(map);
out.close();
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
package test;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
public class Test {
private HashMap zi = new HashMap();
private HashMap ci = new HashMap();
private HashMap ju = new HashMap();
private HashMap count = new HashMap();
private String[] sort = new String[500];
private List result = new ArrayList();
public void run(String content) {
System.out.println(content);
zi = this.load("zi.obj");
ci = this.load("ci.obj");
ju = this.load("ju.obj");
count = this.load("count.obj");
scan(content);
this.save(zi, "zi.obj");
this.save(ci, "ci.obj");
this.save(ju, "ju.obj");
this.save(count, "count.obj");
this.list(count);
System.out.println("/n/n/n/n/n***/n/n/n/n/n");
this.lists(sort);
}
public void scan(String content) {
int l = content.length();
StringBuilder builder = new StringBuilder();
StringBuilder t = new StringBuilder();
char c = 0;
char o = 0;
for(int i=0;i<l;i++) {
//System.out.println(content.charAt(i));
c = content.charAt(i);
zi.put(c, c);
if(split(c)) {
System.out.println();
System.out.println(builder.toString());
ju.put(builder.toString(), builder.toString());
builder.delete(0, builder.length());
o = 0;
c = 0;
} else {
builder.append(c);
if(o != 0) {
t.append(o).append(c);
Object obj;
int k = 0;
obj = count.get(t.toString());
if(obj != null) {
k = (Integer) obj;
k += 1;
count.put(t.toString(), k);
} else {
count.put(t.toString(), 1);
}
t.delete(0, t.length());
}
o = c;
}
}
}
public boolean split(char c) {
boolean result = false;
String chars = " ,。;!?“”…、,.;!?";
for(int i=0;i<chars.length();i++) {
if(chars.charAt(i) == c) {
result = true;
break;
}
}
return result;
}
public HashMap load(String file) {
HashMap map = null;
ObjectInputStream in;
try {
in = new ObjectInputStream(new FileInputStream(file));
map = (HashMap) in.readObject();
in.close();
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (ClassNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return map;
}
public void save(HashMap map, String file) {
ObjectOutputStream out;
try {
out = new ObjectOutputStream(new FileOutputStream(file));
out.writeObject(map);
out.close();
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public void list(HashMap map) {
Iterator it = map.keySet().iterator();
while(it.hasNext()) {
String t = (String) it.next();
int k = (Integer) count.get(t);
System.out.println(t + " " + k);
String s = sort[k];
if(s == null) {
sort[k] = t;
} else {
s = s + " " + t;
sort[k] = s;
}
}
}
public void lists(String[] sort) {
for(int i=0;i<sort.length-1;i++) {
System.out.println(i+": " + sort[i]);
}
}
}
package test;
import java.io.BufferedWriter;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.ObjectOutputStream;
import java.util.HashMap;
public class Words {
private HashMap words = new HashMap();
public void run() throws IOException {
char c=0 , d=0;
int i , j;
long a = 0;
int s = 1;
BufferedWriter writer;
//BufferedWriter writer = new BufferedWriter(new FileWriter("d:/word.txt"));
writer = new BufferedWriter(new FileWriter("d:/w/word" + s + ".txt"));
for(i=19968;i<40870;i++) {
for(j=19968;j<40870;j++) {
c = (char) i;
d = (char) j;
String t = String.valueOf(c) + String.valueOf(d);
//words.put(t, t);
a ++;
//System.out.print(a + ": " + t + " ");
writer.write(a+": "+i+" "+j+" "+t+ " 0 ");
if(a % 255 == 0) {
writer.write("/n");
}
if(a % (255 * 255 * 16 * 4) == 0) {
writer.close();
s++;
writer = new BufferedWriter(new FileWriter("d:/w/word" + s + ".txt"));
}
}
//System.out.println();
}
writer.close();
//this.save(words, "d:/word.obj");
}
public void save(HashMap map, String file) {
ObjectOutputStream out;
try {
out = new ObjectOutputStream(new FileOutputStream(file));
out.writeObject(map);
out.close();
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
- 基于统计方法的二字词发掘程序
- 基于统计方法的二字词发掘程序(改进)
- 字词出现频率统计
- javascript实现邮件中提取关键字词的方法
- 文言字词的理解
- 基于C++的字符统计程序
- 基于图像的人数统计方法
- 如何在面试中发掘程序猿的核心竞争力
- 基于规则和基于统计的区别、方法简介
- 侦测程序句柄泄露的统计方法
- 侦测程序句柄泄露的统计方法
- 侦测程序句柄泄露的统计方法
- 侦测程序句柄泄露的统计方法
- c++统计程序运行时间的方法
- 侦测程序句柄泄露的统计方法
- 基于c++的统计输入字符串中单词个数程序
- 基于UDP 的QQ聊天程序 二
- 发的发掘手机卡
- 可爱的Python
- Stream复制
- 忙碌儿童综合症 hurried child syndrome
- test
- JAVA实现DES加密和解密软件
- 基于统计方法的二字词发掘程序
- asp.net实现RSS订阅
- c#导出EXCEL
- 安全警戒线 security cordon
- ”Undefined reference to“ 的处理
- Throw someone a bone 表面的恩赐
- [英语阅读]蝙蝠侠当选史上最伟大超级英雄
- C#导出Execl汇总
- WEB中基于XMLHTTP的简单实例分析