get the header content from some website, in java

来源:互联网 发布:淘宝手机怎么开店认证 编辑:程序博客网 时间:2024/06/01 12:02

just help one friend for writing some java code to access the meta data of the header of some website's response:

 

=====================================

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;

 

public class ReadWebsite {

 /**
  * @param args
  */
 public static void main(String[] args) {
  int READ_BUFFER_SIZE = 1024;
  
  try
  {
   //make change to a different website, please edit here!
      //URL url = new URL("http://www.google.com/");
   URL url = new URL("http://java.sun.com/j2se/1.4.2/docs/api/java/lang/String.html");
  
      URLConnection urlConnection = url.openConnection();
      InputStream ins = urlConnection.getInputStream();     
      InputStreamReader i = new InputStreamReader(ins);

      BufferedReader b = new BufferedReader(new InputStreamReader(ins));
      String oneLine = "", content = "", oneField;
    
      while ((oneLine = b.readLine()) != null)
      {
       content += oneLine;
      }     
     
      int start = -1, end = -1;
     
      content = content.toUpperCase();
     
      start = content.indexOf("<META");       
   
      while (start != -1)
      { 
       content = content.substring(start);
       
       end = content.indexOf(">");
       if (end != -1)
       {
        oneField = content.substring(0, end + 1);
        System.out.println("/n");
        System.out.println(oneField);
        
        content = content.substring(end + 1);
       }
       
       start = content.indexOf("<META");
      }        
  }
  catch (IOException e)
  {
   System.err.println("Error: " + e);
  }
  
 }

}

 

 

======== output is as follows:

 

<META NAME="COLLECTION" CONTENT="API">


<META NAME="KEYWORDS" CONTENT="JAVA.LANG.STRING CLASS">


<META NAME="KEYWORDS" CONTENT="CASE_INSENSITIVE_ORDER">


<META NAME="KEYWORDS" CONTENT="LENGTH()">


<META NAME="KEYWORDS" CONTENT="CHARAT()">


<META NAME="KEYWORDS" CONTENT="GETCHARS()">


<META NAME="KEYWORDS" CONTENT="GETBYTES()">


<META NAME="KEYWORDS" CONTENT="EQUALS()">


<META NAME="KEYWORDS" CONTENT="CONTENTEQUALS()">


<META NAME="KEYWORDS" CONTENT="EQUALSIGNORECASE()">


<META NAME="KEYWORDS" CONTENT="COMPARETO()">


<META NAME="KEYWORDS" CONTENT="COMPARETOIGNORECASE()">


<META NAME="KEYWORDS" CONTENT="REGIONMATCHES()">


<META NAME="KEYWORDS" CONTENT="STARTSWITH()">


<META NAME="KEYWORDS" CONTENT="ENDSWITH()">


<META NAME="KEYWORDS" CONTENT="HASHCODE()">


<META NAME="KEYWORDS" CONTENT="INDEXOF()">


<META NAME="KEYWORDS" CONTENT="LASTINDEXOF()">


<META NAME="KEYWORDS" CONTENT="SUBSTRING()">


<META NAME="KEYWORDS" CONTENT="SUBSEQUENCE()">


<META NAME="KEYWORDS" CONTENT="CONCAT()">


<META NAME="KEYWORDS" CONTENT="REPLACE()">


<META NAME="KEYWORDS" CONTENT="MATCHES()">


<META NAME="KEYWORDS" CONTENT="REPLACEFIRST()">


<META NAME="KEYWORDS" CONTENT="REPLACEALL()">


<META NAME="KEYWORDS" CONTENT="SPLIT()">


<META NAME="KEYWORDS" CONTENT="TOLOWERCASE()">


<META NAME="KEYWORDS" CONTENT="TOUPPERCASE()">


<META NAME="KEYWORDS" CONTENT="TRIM()">


<META NAME="KEYWORDS" CONTENT="TOSTRING()">


<META NAME="KEYWORDS" CONTENT="TOCHARARRAY()">


<META NAME="KEYWORDS" CONTENT="VALUEOF()">


<META NAME="KEYWORDS" CONTENT="COPYVALUEOF()">


<META NAME="KEYWORDS" CONTENT="INTERN()">

 

原创粉丝点击