内容分析程序

来源:互联网 发布:淘宝详情图关注怎么做 编辑:程序博客网 时间:2024/05/16 09:05

 内容分析程序

 

/*
 * DataAnalysis.java
 * 内容分析程序
 *
 * Created on 2007年10月9日, 下午7:43
 *
 * 马如林 桂林电子科技大学
 * 
 
*/


package com.mrl;

import java.io.*;
import java.util.*;
import java.sql.*;
import java.util.Hashtable;

import com.xjt.nlp.word.ICTCLAS;
import com.mrl.DbConn;
import com.mrl.FileProcess;

/**
 *
 * 
@author rulinma
 
*/

public class DataAnalysis 
{
    
    
/** 哈希表 */
    
private static Hashtable KeywordProbality = new Hashtable();
   
    
/** 最大的Double值 */
    
private static double maxDouble = 1.0;
    
    
/** 最小的Double值*/
    
private static double minDouble = -1.0;
    
    
private static String normalStatus = "1";
    
private static String abnormalStatus = "3";
    
    
/** Creates a new instance of DataAnalysis */
    
public DataAnalysis() 
    
{
    }


    
/*
     * 主函数入口
     
*/

    
public static void main(String[] args)
    
{
        
// 从参数列表中获取
        String tableName = "dlog_diary";
        String primaryKey 
= "diary_id";
        String content 
= "content";
        String status 
= "status";
        String statusFlag 
= "2";
        
// 获取关键词及对应的概率 
        getKeywordProbaliy();
        maxDouble 
= Double.MAX_VALUE / maxDouble;
        minDouble 
= Double.MIN_VALUE / minDouble;
        
// 不断循环,也可以使用定时器
        while(true)
        
{
            
// 获取内容
            getContent(tableName,primaryKey,content,status,statusFlag);
        }

    }

      
    
/*
     * JavaBean对象调用入口,需要修改其中的static定义
     
*/

    
public static void DataAnalysis()
    
{     
        
// 从参数列表中获取
        String tableName = "dlog_diary";
        String primaryKey 
= "diary_id";
        String content 
= "content";
        String status 
= "status";
        String statusFlag 
= "2";
        
        getKeywordProbaliy();
        
while(true)
        
{
            
// 获取内容
            getContent(tableName,primaryKey,content,status,statusFlag);

        }


    }

    
    
private static void getContent(String tableName, String primaryKey, String content, String status, String statusFlag)
    
{
        Connection con 
= DbConn.getConn ();
        Statement stmt 
= null;
        ResultSet rs 
= null;
        
        
while(true)
        
{
            
try
            
{
                stmt 
= con.createStatement();
                String querySql 
= "SELECT " + primaryKey + "," + content+ "," + status + " FROM "+ tableName + " WHERE " + status +" = " + statusFlag ;
                rs 
= stmt.executeQuery(querySql);
                
while(rs.next ())
                
{
                    String id 
= rs.getString(primaryKey);
                    String blogContent 
= rs.getString(content);
                    
// 内容分析
                    boolean result = contentAnalysis(blogContent);
                    
// 反馈程序
                    if(result)
                    
{
                        feedBack(tableName, primaryKey, status, id, normalStatus);
                    }

                    
else
                    
{
                        feedBack(tableName, primaryKey, status, id, abnormalStatus);
                    }

                    
                }

            }

            
catch (SQLException e) 
            
{
                e.getStackTrace ();
            }
              
            
finally
            
{
                
if(rs!=null)
                
{
                    
try
                    
{
                        rs.close ();
                    }

                    
catch(Exception e)
                    
{
                        e.getStackTrace ();
                    }


                }

                
if(stmt!=null)
                
{
                    
try
                    
{
                        stmt.close ();
                    }

                    
catch(Exception e)
                    
{
                        e.getStackTrace ();
                    }
                
                }
            
            }

        }

    }

    
    
/*
     * 字符串分词
     
*/

    
private static boolean contentAnalysis(String strContent)
    
{
        ICTCLAS ictclas 
= new ICTCLAS();
        
if(!ictclas.init (0,2))
        
{
            ictclas.init (
0,2);
        }


        String strTrans
=ictclas.paragraphProcess(strContent);
        
        
// 根据字符串的先验概率计算概率
        return(filterString(strTrans));
    }

    
      
/*
     * 根据字符串的先验概率计算概率
     
*/

    
private static boolean filterString(String srcStr) 
    
{
        
/** 使用trim去掉前后多余空格防止发生意外 */
        String strTemp
=srcStr.trim();
        String tempText 
= "";
        
        
double probality = 1.0;
        
        StringTokenizer st 
= new StringTokenizer(strTemp," ");
        
int len = st.countTokens();
        
int i = 0;

        
while(i<len)
        
{
            tempText 
= st.nextToken();
            i
++;
            
            
// 获取该词对应的概率
            double keywordProbality = divStr(tempText);   
            
{
                probality 
= probality * keywordProbality;
                
// 用2个参数分别表示上溢出和下溢出值 
                
// 溢出处理
                
// 一旦遇到上溢出 表明其中含有不良信息 即可停止计算
                if(probality > maxDouble)
                
{
                    
return false;
                }

                
else  if(probality < minDouble)
                
{
                    
// 继续往下计算 
                    probality = 1.0;
                }

            }

        }

        System.out.println (probality);
        
        
if(probality >1.0)
        
{
            
return false;
        }

        
else
        
{
            
return true;
        }

    }

    
    
/*
     * 分词
     
*/

    
private static double divStr(String srcStr) 
    
{
        StringTokenizer st 
= new StringTokenizer(srcStr,"/");
        
        
double probality = 1.0;
        
        
int len = st.countTokens();

        
if(len == 2)
        
{
            String strPre 
= st.nextToken ();
            
// 获取该词的先验概率 
            probality = Double.parseDouble(KeywordProbality.get(strPre).toString());
        }

        
        
return probality;
    }

       
    
/*
     * 根据系统默认设置获取对应的词和概率存储在向量中
     
*/

     
private static void getKeywordProbaliy()
    
{
        FileProcess fileProcess 
= new FileProcess();
        
        
/** 读取数据配置文件所在目录 */
        String dir 
= fileProcess.GetCurrDir ();
        
        
/** 系统默认设置的目录文件 */
        String fileName 
= "/blogAudit/Incoming/300ArticlesKeywordsPossible.txt";
        
        
/** 完整的文件访问路径 */
        String fullFileName 
= dir + fileName;
        
        File myFile 
= new File(fullFileName);
        
        
if(!myFile.exists())
        

            System.err.println(
"Can't Find " + fullFileName);
        }

        
try 
        
{
            BufferedReader in 
= new BufferedReader(new FileReader(myFile));
            String str;
            
            
while ((str = in.readLine()) != null
            
{    
                
// |为分隔符
                int divPos = str.lastIndexOf("|");
                
try
                
{
                    
if(maxDouble < Double.parseDouble(str.substring(divPos+1,str.length())))
                    
{
                        
// 最大值
                        maxDouble = Double.parseDouble(str.substring(divPos+1,str.length()));
                    }

                    
if(minDouble > Double.parseDouble(str.substring(divPos+1,str.length())))
                    
{
                        
// 最小值
                        minDouble = Double.parseDouble(str.substring(divPos+1,str.length()));
                    }

                    
// 添加词及对应的概率
                    KeywordProbality.put(str.substring(0,divPos),str.substring(divPos+1,str.length()));
                }

                
catch(Exception e)
                
{
                    e.getStackTrace();
                }

            }

            in.close();
        }
 
        
catch (IOException e) 
        
{
            e.getStackTrace();
        }

     }

     
    
/*
     * 反馈系统
     
*/

    
private static void feedBack(String tableName, String primaryKey, String status, String id, String statsUpdateFlag)
    
{
        Connection con 
= DbConn.getConn ();
        Statement stmt 
= null;
        
try
        
{
            stmt 
= con.createStatement();
            String upSql 
= null;   

            upSql 
=  "UPDATE "+ tableName +" SET "+ status + "=" + statsUpdateFlag +" WHERE "+ primaryKey + "=" +id ;
            
/** 执行插入操作*/
            stmt.executeUpdate(upSql);
        }

        
catch (SQLException e) 
        
{
            e.getStackTrace ();
        }
      
        
finally
        
{
            
if(stmt!=null)
            
{
                
try
                
{
                    stmt.close ();
                }

                
catch(Exception e)
                
{
                    e.getStackTrace ();
                }

            }

        }

    }

}

原创粉丝点击