Java之查找html文件当中的所有标签

来源:互联网 发布:ubuntu更新 编辑:程序博客网 时间:2024/06/06 03:35

本文主要是介绍从html文件中获取到所有标签:


  1. 主要还是字符串的操作
  2. 使用了Map
  3. 使用了Formatter类对输出格式进行控制,可以固定输出的长度以及设置对齐方式

import java.util.*;import java.io.*;public class ShowTags {    public static Map<String , Integer> SelectTags(String filePath){        Map<String , Integer> map = new HashMap<String, Integer>();        //读取文件        try{            String encoding = "UTF-8";            File file = new File(filePath);            if(file.exists() && file.isFile()){                InputStreamReader read = new InputStreamReader(new FileInputStream(file),encoding);                BufferedReader bufferedReader = new BufferedReader(read);                String lineTxt = null;                while((lineTxt = bufferedReader.readLine())!=null){                    //System.out.println("lineTxt: "+ lineTxt);                    for(int i = 1 ; i < lineTxt.length() ; i++){                        String temp1 = lineTxt.substring(i-1,i);//                      if(i == 1){//                          System.out.println("temp1: " + temp1);//                      }                        //判断是否符合标签的第一个要求,以<开头                        if(temp1.equals("<")){                            for(int j = i+1 ; j < lineTxt.length() ; j++){                                String temp2 = lineTxt.substring(j-1,j);//                              if(j==2){//                                  System.out.println("temp2: " + temp2);//                              }                                //具体的一些判断                                boolean ano_con = false;                                if(temp2.equals("!") || temp2.equals("/") || temp2.equals("!") || temp2.equals("'") || temp2.equals(" ")){                                    ano_con = true;                                }                                if(j == i+1 && ano_con){                                    i = j - 1;                                    break;                                }else if(temp2.equals("'")){                                    i = j - 1;                                    break;                                }else if(temp2.equals(" ")||temp2.equals(">") || temp2.equals("/")){                                    boolean have = false;                                    String temp3 = lineTxt.substring(i,j-1);                                    //System.out.println("temp3: " + temp3);                                    for(String key:map.keySet()){                                        if(key.equals(temp3)){                                            map.put(key, map.get(key)+1);                                            have = true;                                            i = j-1;                                            break;                                        }                                    }                                    boolean blank = true;                                    if(temp3.equals("\r") || temp3.equals("//") || temp3.equals("\\") || temp3.equals("\"")){                                        blank = false;                                    }                                    if(!have && blank){                                        map.put(temp3, 1);                                        i = j -1;                                    }                                    break;                                }                            }                        }                    }                }                read.close();            }else{                System.out.println("cant find the file!");            }        }catch(Exception e){            System.out.println("read file error!");            e.printStackTrace();        }        return map;    }    public static void main(String[] args){        String path = "D:\\eclipse\\WebJava2\\vacation.htm";        Map<String , Integer> answerMap = new HashMap<String,Integer>();        answerMap = SelectTags(path);        Formatter f = new Formatter(System.out);        //key和value分别输出20和15个字符长度,-号表示左边对齐        f.format("%-20s %-15s\n", "key", "value");        for(String key:answerMap.keySet()){            f.format("%-20s %-15s\n", key, answerMap.get(key));        }    }}

这里写图片描述

0 0
原创粉丝点击