一个关键字标红的通用类
来源:互联网 发布:淘宝宝贝手机排名查询 编辑:程序博客网 时间:2024/05/01 15:22
- import java.lang.reflect.Array;
- import java.util.Arrays;
- import java.util.HashSet;
- import java.util.Iterator;
- import java.util.Set;
-
-
-
-
-
-
-
-
-
-
-
-
- public class TagWord {
- private String tagBegin;
- private String tagEnd;
- Branch frontbegin = null;
- Set<String> keyWords = new HashSet<String>();
-
- public TagWord(String begin, String end) {
- this.tagBegin = begin;
- this.tagEnd = end;
- }
-
- public TagWord addKeyWords(String[] keyWord) {
- if (keyWord.length > 0) {
- for (int i = 0; i < keyWord.length; i++) {
- this.keyWords.add(keyWord[i].trim());
- }
- }
- return this;
- }
-
-
- boolean findWord = false;
-
- public String getTagContent(String content) {
- if (content == null || content.trim().length() == 0
- || keyWords.size() == 0) {
- return content;
- }
- this.frontbegin = new MakeLibrary().getStringTree(this.keyWords);
- if(frontbegin==null){
- return content ;
- }
- char[] chars = content.toCharArray();
-
- StringBuilder sb = new StringBuilder();
-
- WoodInterface head = this.frontbegin;
- int start = 0;
- int end = 1;
- int index = 0;
- boolean isBack = false ;
- int length = chars.length ;
-
- for (int i = 0; i < length; i++) {
- index++ ;
- head = head.get(chars[i]) ;
- if(head==null){
- if(isBack){
- sb.append(tagBegin).append(chars,start,end).append(tagEnd) ;
- start = start+end ;
- i = start-1 ;
- isBack = false ;
- }else{
- sb.append(chars,start,end) ;
- i = start ;
- start++ ;
- }
- head = this.frontbegin ;
- index = 0 ;
- end = 1 ;
- continue ;
- }
- switch (head.getStatus()) {
- case 1:
- break ;
- case 2:
- end = index ;
- isBack = true ;
- break ;
- case 3:
- sb.append(tagBegin).append(chars,start,index).append(tagEnd) ;
- start = start+index ;
- index= 0 ;
- end = 1 ;
- isBack = false ;
- head = this.frontbegin;
- break ;
- }
- }
-
- return sb.toString();
- }
-
-
-
- public static void main(String[] args) {
- String[] keyWords = {"中华人民共和国","孙健","伟大","人民", "中华","万岁" };
- long start = System.currentTimeMillis();
- for (int i = 0; i < 1; i++) {
- String str = new TagWord("<begin>", "<end>").addKeyWords(keyWords)
- .getTagContent(
- "中华人民共和国是一个伟大的民族我们有振兴民族的需要孙健万岁 . 中 国 万万岁哈哈 。");
- System.out.println(str);
- }
- System.out.println(System.currentTimeMillis() - start);
- }
-
- }
-
- class MakeLibrary {
-
- public MakeLibrary() {
- }
-
-
- private static boolean hasNext = true;
-
- private static boolean isWords = true;
-
-
- Iterator<String> it = null;
-
- public Branch getStringTree(Set<String> keyWords) {
- it = keyWords.iterator();
- Branch head = new Branch('h', 0, 0);
- Branch branch = head ;
-
- while (it.hasNext()) {
- char[] chars = it.next().toCharArray();
- for (int i = 0; i < chars.length; i++) {
- if (chars.length == (i + 1)) {
- isWords = true;
- hasNext = false;
- } else {
- isWords = false;
- hasNext = true;
- }
- int status = 1;
- if (isWords && hasNext) {
- status = 2;
- }
-
- if (!isWords && hasNext) {
- status = 1;
- }
-
- if (isWords && !hasNext) {
- status = 3;
- }
- branch.add(new Branch(chars[i], status, 0));
- branch = (Branch) branch.get(chars[i]);
- }
- branch = head ;
- }
- return head;
- }
- }
- interface WoodInterface {
- public WoodInterface add(WoodInterface branch) ;
- public WoodInterface get(char c) ;
- public boolean contains(char c) ;
- public int compareTo(char c) ;
- public boolean equals(char c) ;
- public byte getStatus() ;
- public char getC() ;
- public void setStatus(int status) ;
- public byte getNature() ;
- public void setNature(byte nature) ;
- }
-
-
- class Branch implements WoodInterface {
-
-
-
-
-
- WoodInterface[] branches = new WoodInterface[0];
- private char c;
-
- private byte status = 1;
-
- private short index = -1;
-
- private byte nature = 0;
-
- WoodInterface branch = null;
-
- public WoodInterface add(WoodInterface branch) {
- if ((this.branch=this.get(branch.getC()))!=null) {
- switch (branch.getStatus()) {
- case 1:
- if(this.branch.getStatus()==2){
- this.branch.setStatus(2) ;
- }
- if(this.branch.getStatus()==3){
- this.branch.setStatus(2) ;
- }
- break;
- case 2:
- this.branch.setStatus(2) ;
- case 3:
- if(this.branch.getStatus()==2){
- this.branch.setStatus(2) ;
- }
- if(this.branch.getStatus()==1){
- this.branch.setStatus(2) ;
- }
- }
- this.branch.setNature(branch.getNature()) ;
- return this.branch;
- }
- index++;
- if ((index + 1) > branches.length) {
- branches = Arrays.copyOf(branches, index + 1);
- }
- branches[index] = branch;
- AnsjArrays.sort(branches);
- return branch;
- }
-
- public Branch(char c, int status, int nature) {
- this.c = c;
- this.status = (byte) status;
- this.nature = (byte) nature;
- }
-
- int i = 0;
-
- public WoodInterface get(char c) {
- int i = AnsjArrays.binarySearch(branches, c);
- if (i > -1) {
- return branches[i];
- }
- return null;
- }
-
- public boolean contains(char c) {
- if (AnsjArrays.binarySearch(branches, c) > -1) {
- return true;
- } else {
- return false;
- }
- }
-
- public int compareTo(char c) {
- if (this.c > c) {
- return 1;
- }else if (this.c < c) {
- return -1;
- }else
- return 0 ;
- }
-
- public boolean equals(char c) {
- if (this.c == c) {
- return true;
- } else {
- return false;
- }
- }
-
- @Override
- public int hashCode() {
-
- return c;
- }
-
- public byte getStatus() {
- return status;
- }
-
- public void setStatus(int status) {
- this.status = (byte) status;
- }
-
- public char getC() {
- return this.c;
- }
-
- public byte getNature() {
- return nature;
- }
-
- public void setNature(byte nature) {
- this.nature = nature;
- }
-
- }
- class AnsjArrays {
- private static final int INSERTIONSORT_THRESHOLD = 7;
-
-
-
-
-
-
-
-
- public static int binarySearch(WoodInterface[] branches, char c) {
- int high = branches.length - 1;
- if (branches.length < 1) {
- return high;
- }
- int low = 0;
- while (low <= high) {
- int mid = (low + high) >>> 1;
- int cmp = branches[mid].compareTo(c);
-
- if (cmp < 0)
- low = mid + 1;
- else if (cmp > 0)
- high = mid - 1;
- else
- return mid;
- }
- return -1;
- }
-
- public static void sort(WoodInterface[] a) {
- WoodInterface[] aux = (WoodInterface[])a.clone();
- mergeSort(aux, a, 0, a.length, 0);
- }
-
- public static void sort(WoodInterface[] a, int fromIndex, int toIndex) {
- rangeCheck(a.length, fromIndex, toIndex);
- WoodInterface[] aux = copyOfRange(a, fromIndex, toIndex);
- mergeSort(aux, a, fromIndex, toIndex, -fromIndex);
- }
-
- private static void rangeCheck(int arrayLen, int fromIndex, int toIndex) {
- if (fromIndex > toIndex)
- throw new IllegalArgumentException("fromIndex(" + fromIndex
- + ") > toIndex(" + toIndex + ")");
- if (fromIndex < 0)
- throw new ArrayIndexOutOfBoundsException(fromIndex);
- if (toIndex > arrayLen)
- throw new ArrayIndexOutOfBoundsException(toIndex);
- }
-
- private static void mergeSort(WoodInterface[] src, WoodInterface[] dest, int low,
- int high, int off) {
- int length = high - low;
-
-
- if (length < INSERTIONSORT_THRESHOLD) {
- for (int i = low; i < high; i++)
- for (int j = i; j > low
- && (dest[j - 1]).compareTo(dest[j].getC()) > 0; j--)
- swap(dest, j, j - 1);
- return;
- }
-
-
- int destLow = low;
- int destHigh = high;
- low += off;
- high += off;
- int mid = (low + high) >>> 1;
- mergeSort(dest, src, low, mid, -off);
- mergeSort(dest, src, mid, high, -off);
-
-
-
- if (src[mid - 1].compareTo(src[mid].getC()) <= 0) {
- System.arraycopy(src, low, dest, destLow, length);
- return;
- }
-
-
- for (int i = destLow, p = low, q = mid; i < destHigh; i++) {
- if (q >= high || p < mid
- && src[p].compareTo(src[q].getC()) <= 0)
- dest[i] = src[p++];
- else
- dest[i] = src[q++];
- }
- }
-
-
-
-
- private static void swap(WoodInterface[] x, int a, int b) {
- WoodInterface t = x[a];
- x[a] = x[b];
- x[b] = t;
- }
-
- public static <T> T[] copyOfRange(T[] original, int from, int to) {
- return copyOfRange(original, from, to, (Class<T[]>) original.getClass());
- }
-
- public static <T, U> T[] copyOfRange(U[] original, int from, int to,
- Class<? extends T[]> newType) {
- int newLength = to - from;
- if (newLength < 0)
- throw new IllegalArgumentException(from + " > " + to);
- T[] copy = ((Object) newType == (Object) Object[].class) ? (T[]) new Object[newLength]
- : (T[]) Array
- .newInstance(newType.getComponentType(), newLength);
- System.arraycopy(original, from, copy, 0, Math.min(original.length
- - from, newLength));
- return copy;
- }
- }