基于管道过滤器的KWIC系统实现(1)

来源:互联网 发布:新星星知我心1998全集 编辑:程序博客网 时间:2024/05/17 00:17

    基于管道过滤器的KWIC系统实现。

    实现噪声单词过滤以及首单词大写功能。


package lws;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;

/*
 * To construct the framework of the system
 */
public class KWIC{
  public void execute(String file,String noisyFile){
    try{
      Pipe in_lt=new Pipe();
      Pipe lt_cs = new Pipe();
      Pipe cs_lt = new Pipe();
      Pipe lt_al=new Pipe();
      Pipe al_sf=new Pipe();
      Pipe sf_ou=new Pipe();
     
      FileInputStream in = new FileInputStream(file);
      FileInputStream noise=new FileInputStream(noisyFile);
      BufferedReader br=new BufferedReader(new InputStreamReader(noise));
      String str;
      ArrayList<String> noises=new ArrayList<String>();
      while((str=br.readLine())!=null){
       noises.add(str);
      }
     

      Input input = new Input(in, in_lt);
      LineTransformer lineTransformer1 =new LineTransformer(in_lt,lt_cs);
      CircularShifter shifter = new CircularShifter(lt_cs, cs_lt);
      LineTransformer lineTransformer2 =new LineTransformer(cs_lt,lt_al);
      Alphabetizer alpha = new Alphabetizer(lt_al, al_sf);
      ShiftFilter shiftFilter=new ShiftFilter(al_sf,sf_ou,noises);
      Output output = new Output(sf_ou);
     
      input.start(); // run it !
      lineTransformer1.start();
      shifter.start();
      lineTransformer2.start();
      alpha.start();
      shiftFilter.start();
      output.start();
    }catch(IOException exc){
      exc.printStackTrace();
    }
  }

  /*
   * there would be two parameters to start the main thread,first if the file of input
   * and  the second if the file of noisy words
   */
  public static void main(String[] args){
    if(args.length != 2){
      System.err.println("KWIC Usage: java kwic.ms.KWIC file_name noisy_file_name");
      System.exit(1);
    }

    KWIC kwic = new KWIC();
    kwic.execute(args[0],args[1]);
  }

}

 

package lws;
import java.io.IOException;
import java.io.InputStream;

public class Input extends Filter{

  private InputStream in_;

  /*
   * Read from InputStream in and write to Pipe output
   * And transform the input stream to standard form:every words departed by a single
   * space and every line ends with a '\n'
   */
  public Input(InputStream in, Pipe output){
    super(null, output);
    in_ = in;
  }

  protected void transform(){
    try{
      boolean is_new_line = false;     
      boolean is_new_word = false;
      boolean is_line_started = false;
     
      int c = in_.read();
      while(c != -1){
        switch((byte) c){
        case '\n':         
          is_new_line = true;
          break;
        case ' ':
          is_new_word = true;
          break;
        case '\t':
          is_new_word = true;
          break;
        case '\r':
          break;
        default:
          if(is_new_line){
            output_.write('\n');
            is_new_line = false;
            is_line_started = false;
          }
          if(is_new_word){
            if(is_line_started)
              output_.write(' ');
            is_new_word = false;
          }
          output_.write(c);
          is_line_started = true;
          break;
        }       
        c = in_.read();
      }

      output_.write('\n');
      output_.closeWriter();
    }catch(IOException exc){
      exc.printStackTrace();
      System.err.println("KWIC Error: Could not read the input file.");
      System.exit(1);
    }
  }

}

原创粉丝点击