Java api 调用Sqoop2进行MySQL-->Hive的数据同步
来源:互联网 发布:pinnacle临床数据采集 编辑:程序博客网 时间:2024/06/16 08:51
1.相关jar包
2.一些需要的参数定义在message.properties中
jdbcHiveUrl=jdbc:hive2://10.1.9.91:10000 //hive地址jdbcHiveDriver=org.apache.hive.jdbc.HiveDriver // hive驱动jdbc_mysql_driver=com.mysql.jdbc.Driver //MySQL驱动hiveUser=hive //hive用户名hivePwd=123456 //hive密码hiveType=star hiveDbName=defaultmapred.reduce.tasks=1sqoopServerUrl=http://10.1.9.91:12000/sqoop/ //sqoop服务器地址 ##polling interval time init(ms)polling_interval_time = 86400000 //定时的时间间隔,参照上一篇#Project start day hour:min:second polling_start_time = 10:54:10 //定时的启动时间outputFormat=TEXT_FILE storageType=HDFSsqoopOutput = /user/outputHive/HDFSUrl =export_target_database_url=jdbc:mysql://10.1.65.13:3306/yinzhoutest //MySQL数据库地址export_target_database_username=root //MySQL用户名export_target_database_password=root //MySQL用户密码3.创建DataSynMysqlAndHiveService,HiveService,JDBCService三个service并实现,
package com.scheduler.service.impl;import java.io.IOException;import java.sql.*;import java.text.ParseException;import java.text.SimpleDateFormat;import java.util.*;import java.util.Date;import com.scheduler.service.JDBCService;import com.scheduler.util.*;import org.apache.sqoop.submission.SubmissionStatus;import com.scheduler.service.HiveService;import org.apache.commons.lang3.StringUtils;import org.apache.log4j.Logger;import org.apache.sqoop.client.SqoopClient;import org.apache.sqoop.client.SubmissionCallback;import org.apache.sqoop.model.*;import org.apache.sqoop.validation.Status;import org.springframework.beans.factory.annotation.Autowired;import org.springframework.stereotype.Service;import com.mysql.jdbc.Statement;import com.scheduler.service.DataSynMysqlAndHive;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.LocatedFileStatus;import org.apache.hadoop.fs.Path;import org.apache.sqoop.model.MConnection;import org.apache.sqoop.model.MConnectionForms;import org.apache.sqoop.model.MJob;import org.apache.sqoop.model.MJobForms;import org.apache.sqoop.model.MSubmission;import org.apache.sqoop.submission.counter.Counter;import org.apache.sqoop.submission.counter.CounterGroup;import org.apache.sqoop.submission.counter.Counters;@Service("DataSynMysqlAndHiveImpl")public class DataSynMysqlAndHiveImpl implements DataSynMysqlAndHive {protected Logger log = Logger.getLogger(DataSynMysqlAndHiveImpl.class);private static String jdbcHiveDriver = Messages.getString("jdbcHiveDriver");private static String jdbcHiveUrl = Messages.getString("jdbcHiveUrl"); private static String hiveUser = Messages.getString("hiveUser"); private static String hivePwd = Messages.getString("hivePwd"); private static String exportDatabase = Messages.getString("export_target_database_url"); private static String exportUsername = Messages.getString("export_target_database_username"); private static String exportPassword = Messages.getString("export_target_database_password"); private static String jdbcMysqlDriver = Messages.getString("jdbc_mysql_driver"); private static String pollingStartTime = Messages.getString("polling_start_time");private static SimpleDateFormat yMd = new SimpleDateFormat("yyyy-MM-dd");private static SimpleDateFormat yMdHms = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");private static Date polling_start_time = null; //轮询开始时间 private static String sqoopServerUrl = Messages.getString("sqoopServerUrl");private SqoopClient sqoopClient;// sqoop客户端对象 @Autowiredprivate JDBCService jdbcService; // 增加JDBC服务@Autowiredprivate HiveService hfs;@Overridepublic String exportHiveData(String tableName) {String flag = "success";try { Class.forName(jdbcHiveDriver); } catch (ClassNotFoundException e) { flag = "error"; e.printStackTrace(); log.error("hive链接出错", e); }//获取当天时间以及前一天的时间Date nowDate = new Date();Calendar calendar = Calendar.getInstance(); calendar.setTime(nowDate); calendar.add(Calendar.DAY_OF_MONTH, -1); Date predate = calendar.getTime();SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");String predateString = dateFormat.format(predate) + " " + pollingStartTime;String nowdateString = dateFormat.format(nowDate) + " " + pollingStartTime;String sql = "select * from " + tableName + " where resource_flag = 1 and create_time <= \'" + nowdateString +"\' and create_time >\'" + predateString +"\'";log.info("sql:" + sql);System.out.println("sql:" + sql);try {Connection con = DriverManager.getConnection(jdbcHiveUrl, hiveUser,hivePwd);java.sql.Statement stmt = con.createStatement();ResultSet resultSet = stmt.executeQuery(sql);if (resultSet.next()) {//如果查询hive有数据则进行更新,如果没有数据那么不更新String exportSql = generateExportSql(sql,tableName);ResultSet set = stmt.executeQuery(exportSql);System.out.println("导出sql为:"+exportSql);if (set.next()) {int result = set.getInt(1);if (result == 1) {flag = "error";}}}closeConnection(con, stmt, resultSet);} catch (SQLException e) {e.printStackTrace();flag = "error";}return flag;}/** * @param sql * @param tableName * @return */private String generateExportSql(String selectSql, String tableName) {//拼接sql,使用udf函数导出StringBuffer buffer = new StringBuffer();buffer.append("select dboutput(\'");buffer.append(exportDatabase);buffer.append("\',\'");buffer.append(exportUsername);buffer.append("\',\'");buffer.append(exportPassword);buffer.append("\',\'");//定义数据库链接Connection conn = null;//定义数据库查询结果劫ResultSet rs = null;try {//设置编码/*if (exportDatabase.contains("jdbc:mysql") && !exportDatabase.contains("characterEncoding")) {exportDatabase = exportDatabase + "?characterEncoding=UTF-8";//设置utf-8编码}*///获取数据库链接conn=getConnection(jdbcMysqlDriver, exportDatabase, exportUsername, exportPassword);//获取结果rs=conn.getMetaData().getColumns(null, null, tableName, null); //循环获取所有结果String columnNames = "";String value = "";while(rs.next()){if (!StringUtils.equals("id", rs.getString("COLUMN_NAME"))) {columnNames = columnNames + rs.getString("COLUMN_NAME") + ",";value = value + "?,";}}columnNames = columnNames.substring(0, columnNames.length()-1);value = value.substring(0, value.length()-1);String insertSql = "insert into " + tableName + "(" + columnNames +") values(" +value + ")";buffer.append(insertSql+"\',");buffer.append(columnNames);buffer.append(") from ");buffer.append("("+selectSql.replace("*", columnNames)+")");} catch (Exception e) {e.printStackTrace();}closeConnection(conn, null, rs);System.out.println("导出的sql为:"+buffer.toString());return buffer.toString();}public void closeConnection(Connection connection, java.sql.Statement pStatement, ResultSet resultSet){ try {if (resultSet != null) {resultSet.close();}if (pStatement != null) {pStatement.close();}if (connection != null) {connection.close();}} catch (Exception e) {e.printStackTrace();} }public Connection getConnection(String driver, String url, String userName,String password) {//定义链接Connection connection = null;//加载数据库驱动try {Class.forName(driver);} catch (ClassNotFoundException e) {e.printStackTrace();System.out.println("The Driver loaded error,please contact to your Software Designer!");}//得到数据库链接try {Properties props =new Properties();props.put("remarksReporting","true");props.put("user", userName);props.put("password", password);connection = DriverManager.getConnection(url, props);//connection = DriverManager.getConnection(url, userName, password);} catch (SQLException e) {e.printStackTrace();}return connection;}/** * <p> * Description:[mysql向hive中导入] * */@Overridepublic String importHiveData(String sourceTableName) {//判断有没有数据更新try {Date nowTime = yMdHms.parse(yMdHms.format(new Date()));//前一天时间String preDate = yMdHms.format(TimeHelper.dateAddDay(nowTime,-1));//Timestamp aftTimestamp = getAfterMaxTimestamp(sourceTableName,preDate,"create_time");if (null == aftTimestamp ){return "检测没有新数据";}} catch (ParseException e) {e.printStackTrace();}//定义全局变量监控抽取过程是否出现错误boolean hasError = false;//1.初始化sqoop客户端并且得到sqoop连接MConnection con =initSqoop();//如果得到的连接为空,打印日志,结束该任务if (con == null) {System.out.print("连接为空");return "error";}//2.创建sqoop任务,任务类型为导入任务MJob newjob = sqoopClient.newJob(con.getPersistenceId(),org.apache.sqoop.model.MJob.Type.IMPORT);CallBack callback = new CallBack(sourceTableName);//获取该表的表信息List<TableVO> tableVOs = jdbcService.getTables(exportDatabase, exportUsername, exportPassword, null, null, sourceTableName, null);//获取该表的列信息List<ColumnVO> columnVOs = jdbcService.getColumns(exportDatabase, exportUsername, exportPassword, sourceTableName);boolean isFirst = true;String primaryKey = jdbcService.getPrimaryKey(exportDatabase,exportUsername,exportPassword,null,null,sourceTableName);String hdfsFilePath= "";hdfsFilePath=updateIncrementSqoopJob(newjob,sourceTableName,columnVOs);//启用线程监控sqoop采集时长Thread thread = monitorRuntime(sqoopClient,3*60*60,newjob);//定义任务开始时间变量long startTime = System.currentTimeMillis();//开始sqoop任务采集,并返回sqoop任务采集状态MSubmission submission = startSqoopTask(0,newjob,thread,callback);//将sqoop导入时间字段添加到column中columnVOs=addSqoopTimeColumn(columnVOs);if (submission.getStatus().compareTo(SubmissionStatus.SUCCEEDED) == 0) {// 任务执行成功,则把数据写入到hive中hasError=createOrcHiveAfterSqoop(sourceTableName,columnVOs, hdfsFilePath, startTime, startTime, false);}if (submission.getStatus().compareTo(SubmissionStatus.FAILED) == 0|| submission.getExceptionInfo() != null) {// 任务执行出错,打印出错信息,并记录到任务日志中System.out.println(submission.getExceptionInfo());//出现错误,记录日志,删除hdfs文件addLogCaseSqoopFail(submission,newjob,hdfsFilePath,thread);//标记发生错误hasError = true;return "error";}//afterFinishTask(hasError);return "success";}/** * <p> * Description:[初始化sqoop客户端,得到sqoop链接] * </p> * @return MConnection sqoop连接 */public MConnection initSqoop(){//初始化客户端this.sqoopClient = new SqoopClient(sqoopServerUrl);//获取该数据源的sqoop链接idLong conId = createSqoopConnection("zheda",exportDatabase,exportUsername,exportPassword,jdbcMysqlDriver);//根据sqoop xid 获得链接MConnection con =sqoopClient.getConnection(conId);//将该链接返回return con;}public long createSqoopConnection(String resourceName, String jdbcUrl, String name, String passwd, String driver) {SqoopClient sqoopClient = new SqoopClient(Messages.getString("sqoopServerUrl"));MConnection newCon = sqoopClient.newConnection(1);MConnectionForms conForms = newCon.getConnectorPart();MConnectionForms frameworkForms = newCon.getFrameworkPart();newCon.setName(resourceName);conForms.getStringInput("connection.connectionString").setValue(jdbcUrl);// 数据库连接url字符串conForms.getStringInput("connection.jdbcDriver").setValue(driver);// 数据库驱动conForms.getStringInput("connection.username").setValue(name);// 数据库用户名conForms.getStringInput("connection.password").setValue(passwd);// 数据库密码frameworkForms.getIntegerInput("security.maxConnections").setValue(0);// sqoop的最大连接数try {Status status = sqoopClient.createConnection(newCon);if (status.canProceed()) {return newCon.getPersistenceId();} else {log.info("Check for status and forms error ");System.out.println("Check for status and forms error ");return -1;}} catch (Exception e) {log.error("创建连接出错!:"+e.getMessage());System.out.println(e.getMessage());return -1;}}/** * <p> * Description:[初始化sqoop客户端,得到sqoop链接] * </p> * */// sqoop任务执行回调内部类class CallBack implements SubmissionCallback {private String tableName;public String getTableName() {return tableName;}public void setTableName(String tableName) {this.tableName = tableName;}public CallBack() {super();}public CallBack(String tableName){super();this.tableName= tableName;}@Overridepublic void submitted(MSubmission mSubmission) {}@Overridepublic void updated(MSubmission mSubmission) {}// sqoop任务完成回调函数@Overridepublic void finished(MSubmission arg0) {}}/** * <p> * Description:[启用线程监控sqoop任务执行时长,如果超过执行时长,停止执行该任务] * </p> * * @param SqoopClient sqoop客户端 * @param int 任务执行时长 * @param final long sqoop任务Id * @return Thread 当前的监控线程 */public Thread monitorRuntime(SqoopClient sqc,int taskTime,final MJob sJob){//获取监听时间,如果没有指定监听时间,默认为24小时final int job_timeout_time = taskTime != 0 ? taskTime :20;// 启用一个线程,用于监听sqoop执行任务的时间,如果时间超过最大执行时间,则停止掉该任务Thread thread = new Thread(new Runnable() {@Overridepublic void run() {try {//监听任务执行时长,如果超过最大时间,停掉sqoop任务Thread.sleep(job_timeout_time * 60 * 60 * 1000);sqoopClient.stopSubmission(sJob.getPersistenceId());} catch (InterruptedException e) {log.error("sqoop全量任务发生异常!",e);}}});thread.start();//将该线程返回return thread;}/** * <p> * Description:[任务采集后,根据原表中的字段信息以及hdfs文件地址创建hive表] * </p> * * @param tableName 表名称 * @param columnVOs 表字段 * @param hdfsPath hdfs文件地址 * @return boolean 是否创建成功 */public boolean createHiveTable(String tableName,List<ColumnVO> columnVOs,String hdfsPath){boolean hasError = false;//组装sqlStringBuffer createSql = new StringBuffer("create table " + tableName + "(");for (int i = 0; i < columnVOs.size(); i++) {if (i == 0) {createSql.append("`" + columnVOs.get(i).getColumnName()+ "` string");} else {createSql.append(",`"+ columnVOs.get(i).getColumnName()+ "` string");}}createSql.append(") ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' LOCATION ");createSql.append(" '" + hdfsPath + "'");log.info("createSql:" + createSql);String sql = createSql.toString().trim();//创建表try {boolean success = hfs.createHiveTable(tableName, sql);//如果返回的结果有错误,则标记hive创建出现错误if(!success){hasError = true;}} catch (Exception e) {e.printStackTrace();hasError =true;}//返回结果return hasError;}/** * <p> * Description:[hive表创建失败后,记录日志并且删除对应的hdfs文件] * </p> * * @param tableName 表名称 * @param hdfsPath hdfs文件地址 * @param jobId sqoopJobid */public void addLogCaseCreatehiveTableError(String tableName,String hdfsPath,long jobId){//记录日志,//addTaskLog("create hiveTable "+tableName+" failed!", jobId);//删除hdfs文件deleteHdfsHiveTable(hdfsPath,tableName);}/** * <p> * Description:[启动sqoop采集任务] * </p> * @param loopTime 任务执行次数标识,用于判断创建或者更新任务 * @param newjob sqoopJob实体 * @param Thread 监控任务执行时长的线程 * @param callback sqoop回调类 * @return MSubmission Sqoop提交结果 */public MSubmission startSqoopTask(int loopTime,MJob newjob,Thread thread,CallBack callback){MSubmission submission= null;//第一次执行,则创建新的任务,否则,更新任务if (loopTime == 0) {sqoopClient.createJob(newjob);} else {sqoopClient.updateJob(newjob);}//执行sqoop任务try {submission = sqoopClient.startSubmission(newjob.getPersistenceId(), callback, 100);} catch (InterruptedException e1) {// 发生异常停止掉if (thread.isAlive()) {thread.interrupt();}log.error("sqoop提交全量任务出错!:",e1);}//返回结果return submission;}/** * <p> * Description:[sqoop任务失败时,添加日志,删除hdfs文件等] * </p> * @param MSubmission Sqoop提交结果 * @param MJob sqoopJob实体 * @param String hdfs文件地址 * @param Thread 监控任务执行时长的线程 * @return void */public void addLogCaseSqoopFail(MSubmission submission,MJob sJob,String hdfsUrl,Thread thread){//后台打印出错误信息System.out.println(submission.getExceptionInfo());// 删除hdfs文件deleteHdfsFiles(hdfsUrl);//如果监控线程还在继续,则停止线程if (thread.isAlive()) {thread.interrupt();// 发生异常停止掉}}/** * <p> * Description:[根据传入的表名和列信息,组装成创建表的sql] * </p> * @param tableName 表名称 * @param columnVOs 表字段 * @return String 生成的sql */public String getCreateTableSQL(String tableName,List<ColumnVO> columnVOs,boolean isText){//组装sqlStringBuffer createSql = new StringBuffer("create table " + tableName + "(");for (int i = 0; i < columnVOs.size(); i++) {if (i == 0) {createSql.append("`" + columnVOs.get(i).getColumnName()+ "` string");} else {createSql.append(",`"+ columnVOs.get(i).getColumnName()+ "` string");}}createSql.append(")");if (isText) {createSql.append(" ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' ");}log.info("createSql:" + createSql);String sql = createSql.toString().trim();//返回结果return sql;}/** * <p> * Description:[根据传入列对象,组装列信息] * </p> * * @param columnVOs 表字段 * @return String 生成的sql */public String getColumns(List<ColumnVO> columnVOs){//组装sqlStringBuffer columns = new StringBuffer("");for (int i = 0; i < columnVOs.size(); i++) {if (i == 0) {columns.append("`" + columnVOs.get(i).getColumnName()+ "` string");} else {columns.append(",`"+ columnVOs.get(i).getColumnName()+ "` string");}}log.info("createSql:" + columns);String column = columns.toString().trim();//返回结果return column;}/** * <p> * Description:[增量sqoop导入完成之后,创建hiveorc表,插入orc数据,实现增量,保存源数据信息] * </p> * * @param tableVOs 源表信息 * @param columnVOs 源表字段信息 * @param hdfsFilePath sqoop导入成功后hdfs文件地址 * @param jobId sqoopJobid 用于保存任务日志信息 * @param startTime 任务开始时间用于保存该任务总共花费时间 * @return boolean 整个过程是否发生错误,true 存在错误, false 正常执行,不存在错误 */public boolean createOrcHiveAfterSqoop(String table, List<ColumnVO> columnVOs,String hdfsFilePath,long jobId,long startTime,boolean isFirst){boolean hasError = false;// 定义表名String orcTableName = table;String sourceTableName= table;String primaryKey = jdbcService.getPrimaryKey(exportDatabase,exportUsername,exportPassword,null,null,sourceTableName);try {if(primaryKey == null || primaryKey.trim().equals("")) {primaryKey = columnVOs.get(0).getColumnName();}//textfileTable在这里表示 增量数据临时表的表名,先将增量数据放在临时表,再将临时表的数据导入目标表String textfileTable = orcTableName+"_temp";//获取sqlString sql = getCreateTableSQL(textfileTable,columnVOs,true);// 创建hive表,并把增量的数据导入到hive表中hfs.createHiveTempTable(textfileTable, sql,hdfsFilePath);// 非第一次导入,先将hive中相关的数据删除,再插入相关数据long incrementInsertTime = System.currentTimeMillis();hfs.deleteIncrementDataExistInOrcTable(textfileTable, orcTableName, primaryKey,jdbcHiveUrl);hfs.insertIntoHiveOrcTable(textfileTable, orcTableName, jdbcHiveUrl);long incrementInsertTimeEnd = System.currentTimeMillis();System.out.println("orc增量新增和更新数据到orc表所用时间:" + (incrementInsertTimeEnd - incrementInsertTime));log.info("orc增量新增和更新数据到orc表所用时间:" + (incrementInsertTimeEnd - incrementInsertTime));} catch (Exception e) {hasError = true;log.error("全量任务创建hive表出错!",e);}return hasError;}/** * <p> * Description:[在获取的源表的字段列表中加入sqoop的loadtime字段,字段名称为“load_bigdata_time”] * </p> * @param List<ColumnVO> 源表字段信息 * @return List<ColumnVO> */public List<ColumnVO> addSqoopTimeColumn(List<ColumnVO> cVos){ColumnVO cVo= new ColumnVO();cVo.setColumnName("load_bigdata_time");cVo.setComment("Sqoop导入时间");cVo.setType("datetime");cVos.add(cVo);return cVos;}/** * 在sqoop导入时出现问题,删除已经生成的hdfs文件,hive在创建表时出现问题,删除已经创建的表和hdfs文件 * * @param HDFSPath * @param HiveTableName */private void deleteHdfsHiveTable(String HDFSPath, String HiveTableName) {String HDFSUrl = Messages.getString("HDFSUrl");String HDFSFilePath = HDFSUrl + HDFSPath;System.setProperty("HADOOP_USER_NAME", Messages.getString("hiveUser"));try {try {hfs.deleteFdfsByHiveTable(HiveTableName);hfs.deleteHiveTrueTable(HiveTableName);} catch (ClassNotFoundException e1) {e1.printStackTrace();} // 如果表存在,删除表// 删除hdfs文件Path p = new Path(HDFSFilePath);Configuration conf = new Configuration();try {FileSystem fs = p.getFileSystem(conf);boolean isHad = fs.exists(p);if (isHad) {fs.delete(p, true);}// boolean b = fs.createNewFile(p);fs.close();} catch (IOException e) {e.printStackTrace();}} catch (SQLException e) {e.printStackTrace();}}public void deleteHdfsFiles(String hdfsPath) {String HDFSFilePath = jdbcHiveUrl + hdfsPath;System.setProperty("HADOOP_USER_NAME", hiveUser);try {// 删除hdfs文件Path p = new Path(HDFSFilePath);Configuration conf = new Configuration();FileSystem fs = p.getFileSystem(conf);boolean isHad = fs.exists(p);if (isHad) {fs.delete(p, true);}fs.close();} catch (Exception e) {e.printStackTrace();}}//判断从上一次更新之后数据的最大时间public Timestamp getAfterMaxTimestamp( String tableName, String preTimestamp, String columnName) {Timestamp timestamp = null;Connection connection = JdbcConnection.getConnection(jdbcMysqlDriver, exportDatabase,exportUsername, exportPassword);PreparedStatement pStatement = null;ResultSet resultSet = null;String sql = "select max(date_format(" + columnName + ",'%Y-%m-%d %H:%i:%S')) from "+ "(select * from " + tableName + " where date_format(" + columnName + ",'%Y-%m-%d %H:%i:%S') > '" + preTimestamp + "') as increment";/*如果是Oracle {sql = "select max(to_char(" + columnName + ",'yyyy-MM-dd hh24:mi:ss')) from ("+ "select * from " + tableName + " where to_char(" + columnName + ",'yyyy-MM-dd hh24:mi:ss') > '" + preTimestamp + "')";} 如果是Sybase {sql = "select * from " + tableName;} 如果是sql server {sql = "select max(Convert(varchar," + columnName + ",120)) from ("+ "select * from " + tableName + " where Convert(varchar," + columnName + ",120) > '" + preTimestamp + "') as increment";}*/try {pStatement = connection.prepareStatement(sql);resultSet = pStatement.executeQuery();if (resultSet.next()) {//timestamp = changeToTimestamp(resultSet.getString(1));if(resultSet.getString(1) == null) {return timestamp;}timestamp =Timestamp.valueOf(resultSet.getString(1));}} catch (SQLException e) {e.printStackTrace();} finally {JdbcConnection.closeConnection(connection, pStatement, resultSet);}return timestamp;}/** * 1111更新increment sqoop Job配置 */private String updateIncrementSqoopJob(MJob newjob, String tableName, List<ColumnVO> columns) {MJobForms connectorForm = newjob.getConnectorPart();MJobForms frameworkForm = newjob.getFrameworkPart();newjob.setName("ImportJob_zheda");//获取源表的主键String primaryKey = jdbcService.getPrimaryKey(exportDatabase,exportUsername,exportPassword,null,null,tableName);//如果主键不为空,设定“partitionColumn”参数为主键,并且设置任务执行的map数为10if(primaryKey != null && !primaryKey.trim().equals("")) {frameworkForm.getIntegerInput("throttling.extractors").setValue(10);// 指定map的个数connectorForm.getStringInput("table.partitionColumn").setValue(primaryKey);//如果主键为空,选取不为时间类型的字段为“partitionColumn”参数,并指定map数为1}else {//选取不为时间类型的字段for(int i=0;i<columns.size();i++){if (!columns.get(i).getType().toUpperCase().contains("TIME")&&!columns.get(i).getType().toUpperCase().contains("DATE")) {primaryKey = columns.get(i).getColumnName();break;}}//设定“partitionColumn”参数connectorForm.getStringInput("table.partitionColumn").setValue(primaryKey);// 指定map的个数frameworkForm.getIntegerInput("throttling.extractors").setValue(1);}// 控制增量导入//获取当天时间以及前一天的时间Date nowDate = new Date();Calendar calendar = Calendar.getInstance();calendar.setTime(nowDate);calendar.add(Calendar.DAY_OF_MONTH, -1);Date predate = calendar.getTime();SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");String predateString = dateFormat.format(predate) + " " + pollingStartTime;String nowdateString = dateFormat.format(nowDate) + " " + pollingStartTime;String sql = "select * FROM " + tableName + " where ";String charStr = " Convert(varchar,"+"create_time"+",120) ";charStr = "date_format(" +"create_time" + ",'%Y-%m-%d %H:%i:%S') ";sql += charStr + " > '" + predateString + "' and " + charStr + " <= '" + nowdateString+ "' and ${CONDITIONS}";System.out.println("SQL ::"+sql);connectorForm.getStringInput("table.sql").setValue(sql);String hdfdFilePath = Messages.getString("sqoopOutput") + new Date().getTime() + tableName;frameworkForm.getEnumInput("output.storageType").setValue(Messages.getString("storageType"));frameworkForm.getEnumInput("output.outputFormat").setValue(Messages.getString("outputFormat"));frameworkForm.getStringInput("output.outputDirectory").setValue(hdfdFilePath);frameworkForm.getIntegerInput("throttling.extractors").setValue(1);// 指定map的个数return hdfdFilePath;}}
package com.scheduler.service.impl;import java.io.IOException;import org.apache.hadoop.fs.FileSystem;import java.sql.Connection;import java.sql.DriverManager;import java.sql.PreparedStatement;import java.sql.ResultSet;import java.sql.ResultSetMetaData;import java.sql.SQLException;import java.sql.Statement;import java.sql.Timestamp;import java.text.SimpleDateFormat;import java.util.ArrayList;import java.util.Date;import java.util.HashMap;import java.util.LinkedList;import java.util.List;import java.util.Map;import com.scheduler.util.Constant;import com.scheduler.util.Messages;import com.scheduler.util.ColumnVO;import org.json.JSONArray;import org.json.JSONObject;import org.springframework.stereotype.Service;import com.scheduler.service.HiveService;import org.apache.sqoop.client.SubmissionCallback;import org.apache.commons.lang.StringUtils;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.LocatedFileStatus;import org.apache.hadoop.fs.Path;import org.apache.hadoop.fs.RemoteIterator;import org.apache.log4j.Logger;import javax.annotation.Resource;/** * <p> * Title: manageplatform_[Hive] * </p> * <p> * Description: [HiveService实现层] * </p> * * @author GLJ * @author (latest modification by $Author$) * @version $Revision$ 2015-03-18 * @since 20130601 */@Service("hiveServiceImpl")public class HiveServiceImpl implements HiveService { protected Logger log = Logger.getLogger(DataSynMysqlAndHiveImpl.class); private static String jdbcHiveDriver = Messages.getString("jdbcHiveDriver"); private static String jdbcHiveUrl = Messages.getString("jdbcHiveUrl"); private static String hiveUser = Messages.getString("hiveUser"); private static String hivePwd = Messages.getString("hivePwd"); private static String exportDatabase = Messages.getString("export_target_database_url"); private static String exportUsername = Messages.getString("export_target_database_username"); private static String exportPassword = Messages.getString("export_target_database_password"); private static String jdbcMysqlDriver = Messages.getString("jdbc_mysql_driver"); public HiveServiceImpl() { } @Override public boolean existTable(String table) throws SQLException { boolean flag = false; try { Class.forName(jdbcHiveDriver); } catch (ClassNotFoundException e) { e.printStackTrace(); log.error("hive链接出错", e); } Connection con = DriverManager.getConnection(jdbcHiveUrl, hiveUser, hivePwd); java.sql.Statement stmt = con.createStatement(); String sql = "show tables '" + table + "'"; log.info("sql:" + sql); ResultSet set = stmt.executeQuery(sql); while (set.next()) { String reTableName = set.getString(1); if ((table.toLowerCase()).equals(reTableName.toLowerCase())) { flag = true; break; } } return flag; } @Override public boolean createTableAsSelect(String targetTableName, String select) throws SQLException { String create = "CREATE TABLE " + targetTableName; String option = " row format delimited fields terminated by '\001' "; // you // can // change // it String as = " AS " + select; // here you can decide which column, table // to select, join table or more // comprehension clause String sql = create + option + as; log.info("创建数据表sql:" + sql); System.out.println("Running: " + sql); try { Class.forName(jdbcHiveDriver); } catch (ClassNotFoundException e) { log.error("hive链接出错", e); e.printStackTrace(); } Connection con = DriverManager.getConnection(jdbcHiveUrl, hiveUser, hivePwd); java.sql.Statement stmt = con.createStatement(); stmt.execute(sql); stmt.close(); con.close(); return true; } //11111111111111 @Override public void deleteHiveTrueTable(String tableName) throws SQLException { String deleteSql = "drop table if exists " + tableName; System.out.println("Running: " + deleteSql); log.info("删除数据表sql:" + deleteSql); try { Class.forName(jdbcHiveDriver); } catch (ClassNotFoundException e) { log.error("hive链接出错", e); e.printStackTrace(); } Connection con = DriverManager.getConnection(jdbcHiveUrl, hiveUser, hivePwd); java.sql.Statement stmt = con.createStatement(); stmt.execute(deleteSql); stmt.close(); con.close(); } @Override public List<Map<String, String>> getHiveColunmsByTableName(String hiveurl, String userName, String password, String tableName) { List<Map<String, String>> colsAndType = new ArrayList<Map<String, String>>(); try { String jdbcHiveDriver = Messages.getString("jdbcHiveDriver"); Class.forName(jdbcHiveDriver); } catch (ClassNotFoundException e) { log.error("hive链接出错", e); e.printStackTrace(); } Connection con; try { con = DriverManager.getConnection(hiveurl, userName, password); Statement stmt = con.createStatement(); String sql = "desc " + tableName; log.info("获取表字段sql" + sql); ResultSet resultSet = stmt.executeQuery(sql); while (resultSet.next()) { Map<String, String> map = new HashMap<String, String>(); String colunm = resultSet.getString(1); String type = resultSet.getString(2); map.put("column", colunm); map.put("type", type); colsAndType.add(map); } stmt.close(); con.close(); } catch (SQLException e) { e.printStackTrace(); log.error("sql执行出错", e); } return colsAndType; } @Override public List<String> getColumnValues(String tableName, String colName) { String jdbcHiveUrl = Messages.getString("jdbcHiveUrl"); String hiveUser = Messages.getString("hiveUser"); String hivePwd = Messages.getString("hivePwd"); String sql = "select distinct " + colName + " from " + tableName; try { final String jdbcHiveDriver = Messages.getString("jdbcHiveDriver"); Class.forName(jdbcHiveDriver); Connection con; con = DriverManager.getConnection(jdbcHiveUrl, hiveUser, hivePwd); final Statement stmt = con.createStatement(); log.info("sql:" + sql); final ResultSet datSet = stmt.executeQuery(sql); List<String> values = new ArrayList<String>(); while (datSet.next()) { values.add(datSet.getString(1)); } return values; } catch (final ClassNotFoundException e) { log.error("hive链接出错", e); e.printStackTrace(); return null; } catch (SQLException e) { log.error("sql执行出错", e); e.printStackTrace(); return null; } }/* * 得到所有表 *//*private ArrayList<String> getTables() throws SQLException {try {Class.forName(jdbcHiveDriver);} catch (ClassNotFoundException e) {e.printStackTrace();log.error("hive链接出错",e);}Connection con = DriverManager.getConnection(jdbcHiveUrl, hiveUser,hivePwd);java.sql.Statement stmt = con.createStatement();if (stmt == null)return null;String sql = "show tables";ArrayList<String> result = new ArrayList<String>();log.info("sql:"+sql);ResultSet res = stmt.executeQuery(sql);while (res.next()) {result.add(res.getString(1));}stmt.close();con.close();return result;}*/ @Override public List<String> getTablesColName(String url, long resourceId, String userName, String password, String goOnTableName) { List<String> tableList = new LinkedList<String>(); if (url.contains("jdbc:sybase:Tds")) { tableList = this.getColNameOfSybase(url, resourceId, userName, password, goOnTableName); return tableList; } try { String jdbcMysqlDriver = Messages.getString("jdbc_mysql_driver"); if (url.contains("jdbc:oracle")) { jdbcMysqlDriver = Messages.getString("jdbc_oracle_driver"); } else if (url.contains("jdbc:sqlserver")) { jdbcMysqlDriver = Messages.getString("jdbc_sqlserver_driver"); } Class.forName(jdbcMysqlDriver); } catch (ClassNotFoundException e) { log.error("hive链接异常", e); // TODO Auto-generated catch block e.printStackTrace(); } Connection con; try { con = DriverManager.getConnection(url, userName, password); Statement stmt = con.createStatement(); ResultSet tableSet = null; PreparedStatement pStatement = null; if (url.contains("jdbc:oracle")) { String sql1 = Messages.getString("oracle_show_tables"); log.info("sql:" + sql1); pStatement = con.prepareStatement(sql1); tableSet = pStatement.executeQuery(); } else if (url.contains("jdbc:sqlserver")) { String sql2 = Messages.getString("sqlserver_show_tables"); log.info("sql:" + sql2); pStatement = con.prepareStatement(sql2); tableSet = pStatement.executeQuery(); } else { String[] type = {"TABLE"}; tableSet = con.getMetaData().getTables("", "", "", type); } Boolean id = false; while (tableSet.next()) { String tableName = null; if (url.contains("jdbc:oracle")) { tableName = tableSet.getString(1); } else if (url.contains("jdbc:sqlserver")) { tableName = tableSet.getString(1); } else { tableName = tableSet.getString("TABLE_NAME"); } if (goOnTableName == null || goOnTableName.equals("") || goOnTableName.equals(" ")) { id = true; } else { if (tableName.equals(goOnTableName)) id = true; } if (id) { tableList.add(tableName); } } stmt.close(); con.close(); } catch (SQLException e) { log.error("SQL执行异常", e); e.printStackTrace(); } return tableList; } private List<String> getColNameOfSybase(String url, long resourceId, String userName, String password, String goOnTableName) { List<String> tableList = new LinkedList<String>(); String jdbcMysqlDriver = Messages.getString("jdbc_sybase_driver"); try { Class.forName(jdbcMysqlDriver); String sql = Messages.getString("sybase_show_tables"); Connection con = DriverManager.getConnection(url, userName, password); Statement stmt = con.createStatement( ResultSet.TYPE_SCROLL_INSENSITIVE, ResultSet.CONCUR_READ_ONLY); log.info("sql:" + sql); PreparedStatement pStatement = con.prepareStatement(sql); ResultSet tableSet = pStatement.executeQuery(); Boolean id = false; while (tableSet.next()) { String tableName = tableSet.getString("TABLE_NAME"); if (goOnTableName == null || goOnTableName.equals("") || goOnTableName.equals(" ")) { id = true; } else { if (tableName.equals(goOnTableName)) id = true; } if (id) { tableList.add(tableName); } } stmt.close(); con.close(); } catch (ClassNotFoundException e) { e.printStackTrace(); log.error("hive链接出错", e); } catch (SQLException e) { e.printStackTrace(); log.error("SQL执行异常", e); } return tableList; } @Override public List<String> getViewsColName(String url, long resourceId, String userName, String password, String schemaName, String goOnViewName) { List<String> viewList = new LinkedList<String>(); if (url.contains("jdbc:sybase:Tds")) { viewList = getSybaseView(url, resourceId, userName, password, goOnViewName); return viewList; } try { String jdbcMysqlDriver = Messages.getString("jdbc_mysql_driver"); if (url.contains("jdbc:oracle")) { jdbcMysqlDriver = Messages.getString("jdbc_oracle_driver"); } else if (url.contains("jdbc:sqlserver")) { jdbcMysqlDriver = Messages.getString("jdbc_sqlserver_driver"); } Class.forName(jdbcMysqlDriver); } catch (ClassNotFoundException e) { log.error("jdbc链接异常", e); e.printStackTrace(); } Connection con; try { con = DriverManager.getConnection(url, userName, password); Statement stmt = con.createStatement(); ResultSet viewSet = null; PreparedStatement pStatement = null; String vn = "name"; if (url.contains("jdbc:oracle")) { String sql1 = Messages.getString("oracle_show_views"); log.info("sql:" + sql1); pStatement = con.prepareStatement(sql1); viewSet = pStatement.executeQuery(); vn = "VIEW_NAME"; } else if (url.contains("jdbc:sqlserver")) { String sql2 = Messages.getString("sqlserver_show_views"); log.info("sql:" + sql2); pStatement = con.prepareStatement(sql2); viewSet = pStatement.executeQuery(); } else { String sql3 = Messages.getString("mysql_show_views") + "'" + schemaName + "'"; log.info("sql:" + sql3); pStatement = con.prepareStatement(sql3); viewSet = pStatement.executeQuery(); vn = "table_name"; } Boolean id = false; while (viewSet.next()) { String tableName = viewSet.getString(vn); if (goOnViewName == null || goOnViewName.equals("") || goOnViewName.equals(" ")) { id = true; } else { if (tableName.equals(goOnViewName)) id = true; } if (id) { viewList.add(tableName); } } stmt.close(); con.close(); } catch (SQLException e) { log.error("SQL执行异常", e); e.printStackTrace(); } return viewList; } private List<String> getSybaseView(String url, long resourceId, String userName, String password, String goOnTableName) { List<String> viewList = new LinkedList<String>(); String jdbcMysqlDriver = Messages.getString("jdbc_sybase_driver"); try { Class.forName(jdbcMysqlDriver); String sql = Messages.getString("sybase_show_views") + "'sysquerymetrics'"; Connection con = DriverManager.getConnection(url, userName, password); Statement stmt = con.createStatement( ResultSet.TYPE_SCROLL_INSENSITIVE, ResultSet.CONCUR_READ_ONLY); log.info("sql:" + sql); PreparedStatement pStatement = con.prepareStatement(sql); ResultSet tableSet = pStatement.executeQuery(); Boolean id = false; while (tableSet.next()) { String tableName = tableSet.getString("name"); if (goOnTableName == null || goOnTableName.equals("") || goOnTableName.equals(" ")) { id = true; } else { if (tableName.equals(goOnTableName)) id = true; } if (id) { viewList.add(tableName); } } stmt.close(); con.close(); } catch (ClassNotFoundException e) { log.error("hive连接异常", e); e.printStackTrace(); } catch (SQLException e) { log.error("SQL执行异常", e); e.printStackTrace(); } return viewList; }//111111111111111111111 @Override public boolean createHiveTable(String tableName,String sql) throws SQLException { boolean success= true; String hiveUser = Messages.getString("hiveUser"); String hivePwd = Messages.getString("hivePwd"); String hiveUrl=Messages.getString("jdbcHiveUrl"); System.setProperty("HADOOP_USER_NAME", hiveUser); try { String jdbcHiveDriver = Messages.getString("jdbcHiveDriver"); Class.forName(jdbcHiveDriver); } catch (ClassNotFoundException e) { log.error("hive连接异常", e); e.printStackTrace(); success = false; } Connection con; con = DriverManager.getConnection(hiveUrl, hiveUser, hivePwd); Statement stmt = con.createStatement(); try { deleteFdfsByHiveTable(tableName); } catch (ClassNotFoundException e) { e.printStackTrace(); log.error("hive连接异常", e); } // 同时删除对应的hdfs文件,因为是建外表 long startTime = System.currentTimeMillis(); String dropIfExistsTable = "drop table if exists " + tableName; long endTime = System.currentTimeMillis(); System.out.println("删除已存在的表所花时间(针对全量导入):" + (endTime - startTime)); stmt.execute(dropIfExistsTable); log.info("createSql:" + sql); stmt.execute(sql); stmt.close(); con.close(); return success; } /** * 根据表名删除该hive表对应的hdfs文件,主要针对hive中的外表 *11111111111111 * @param tableName * @return * @throws ClassNotFoundException * @throws SQLException */ public boolean deleteFdfsByHiveTable(String tableName) throws ClassNotFoundException, SQLException { boolean b = false; String jdbcHiveDriver = Messages.getString("jdbcHiveDriver"); String jdbcHiveUrl = Messages.getString("jdbcHiveUrl"); String hiveUser = Messages.getString("hiveUser"); String hivePwd = Messages.getString("hivePwd"); String sqoopOutput = Messages.getString("sqoopOutput"); String HDFSpath = Messages.getString("HDFSpath"); System.setProperty("HADOOP_USER_NAME", Messages.getString("hiveUser")); String rootPath = Messages.getString("HDFSUrl"); Class.forName(jdbcHiveDriver); String path = null; Connection con = DriverManager.getConnection(jdbcHiveUrl, hiveUser, hivePwd); java.sql.Statement stmt = con.createStatement(); // 判断该表是否存在 String sqlHad = "show tables '" + tableName + "'"; ResultSet had = stmt.executeQuery(sqlHad); if (!had.next()) { return true; } String sql = "describe formatted " + tableName; log.info("sql:" + sql); ResultSet set = stmt.executeQuery(sql); while (set.next()) { String location = set.getString(1); if (location != null && "Location:".equals(location.replace(" ", ""))) path = set.getString(2); } set.close(); stmt.close(); con.close(); if (path != null) { String[] paths = null; if (path.contains(sqoopOutput)) { paths = path.split(sqoopOutput); } else if (path.contains(HDFSpath)) { paths = path.split(HDFSpath); } if (paths != null && paths.length > 0) { String dfs = paths[0]; path = path.replace(dfs, rootPath); Path p = new Path(path); Configuration conf = new Configuration(); try { FileSystem fs = p.getFileSystem(conf); boolean isHad = fs.exists(p); if (isHad) { b = fs.delete(p, true); } else { b = true; } // boolean b = fs.createNewFile(p); fs.close(); } catch (IOException e) { log.error("HDFS文件读取异常", e); e.printStackTrace(); } } } return b; } @Override public boolean isExistHiveTable(String tableName) throws SQLException { String hiveUser = Messages.getString("hiveUser"); String hivePwd = Messages.getString("hivePwd"); String hiveUrl = Messages.getString("jdbcHiveUrl"); System.setProperty("HADOOP_USER_NAME", hiveUser); boolean exist = false; if (tableName == null || tableName.trim().equals("")) return false; try { String jdbcHiveDriver = Messages.getString("jdbcHiveDriver"); Class.forName(jdbcHiveDriver); } catch (ClassNotFoundException e) { log.error("hive链接异常", e); e.printStackTrace(); } Connection con; con = DriverManager.getConnection(hiveUrl, hiveUser, hivePwd); Statement stmt = con.createStatement(); String showTablesql = "show tables '" + tableName + "'"; log.info("showTablesql:" + showTablesql); ResultSet tableSet = stmt.executeQuery(showTablesql); if (tableSet.next()) { exist = true; } return exist; } /** * 创建Hive textfiled表 */ public String createHiveTempTable(String tableName,String sql, String HDFSPAth) throws SQLException { String hiveUser = Messages.getString("hiveUser"); String hivePwd = Messages.getString("hivePwd"); String hiveUrl = Messages.getString("jdbcHiveUrl"); System.setProperty("HADOOP_USER_NAME", hiveUser); try { String jdbcHiveDriver = Messages.getString("jdbcHiveDriver"); Class.forName(jdbcHiveDriver); } catch (ClassNotFoundException e) { log.error("hive链接异常", e); e.printStackTrace(); } Connection con; con = DriverManager.getConnection(hiveUrl, hiveUser, hivePwd); Statement stmt = con.createStatement(); String dropIfExistsTable = "drop table if exists " + tableName; log.info("dropIfExistsTable:" + dropIfExistsTable); stmt.execute(dropIfExistsTable); log.info("createSql:" + sql); stmt.execute(sql); String loadData = "LOAD DATA INPATH '" + HDFSPAth + "' INTO TABLE " + tableName; log.info("loadData:" + loadData); stmt.execute(loadData); stmt.close(); con.close(); return tableName; } /** * 创建hive表 add by yangqi 2015/10/10 */ @Override public String createHiveORCTable(String tableName,String primaryKey, String sql) throws SQLException { String hiveUser = Messages.getString("hiveUser"); String hivePwd = Messages.getString("hivePwd"); String hiveUrl = Messages.getString("jdbcHiveUrl"); System.setProperty("HADOOP_USER_NAME", hiveUser); try { String jdbcHiveDriver = Messages.getString("jdbcHiveDriver"); Class.forName(jdbcHiveDriver); } catch (ClassNotFoundException e) { log.error("hive连接异常", e); e.printStackTrace(); } Connection con; con = DriverManager.getConnection(hiveUrl, hiveUser, hivePwd); Statement stmt = con.createStatement(); try { deleteFdfsByHiveTable(tableName); } catch (ClassNotFoundException e) { log.error("hive连接异常", e); e.printStackTrace(); } // 同时删除对应的hdfs文件,因为是建外表 String dropIfExistsTable = "drop table if exists " + tableName; log.info("dropIfExistsTable:" + dropIfExistsTable); stmt.execute(dropIfExistsTable); stmt.execute("set ngmr.partition.automerge = true"); String createSql= sql+" CLUSTERED BY (" + primaryKey + ") INTO " + "100" + " BUCKETS ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' STORED AS ORC TBLPROPERTIES " + "('transactional'='true')"; System.out.println(createSql); log.info("createSql:" + createSql); stmt.execute(createSql.toString().trim()); stmt.close(); con.close(); // return tableName; } /** * 创建hiveorc表 add by yangqi 2015/10/10 *///1111111111111111 // 将数据从hive的textFile表导入到orc表中 @Override public void insertIntoHiveOrcTable(String textfileTableName, String orcTableName, String hiveUrl) throws SQLException { String hiveUser = Messages.getString("hiveUser"); String hivePwd = Messages.getString("hivePwd"); System.setProperty("HADOOP_USER_NAME", hiveUser); try { String jdbcHiveDriver = Messages.getString("jdbcHiveDriver"); Class.forName(jdbcHiveDriver); } catch (ClassNotFoundException e) { log.error("hive连接异常", e); e.printStackTrace(); } Connection con; con = DriverManager.getConnection(hiveUrl, hiveUser, hivePwd); Statement stmt = con.createStatement(); //获取text表的大小,根据这个大小来判断task的量 Map<String,String> map = getCountAndSize(textfileTableName, Messages.getString("jdbcHiveUrl")); stmt.execute("set ngmr.partition.automerge = true"); long count = Long.parseLong(map.get("count")); if(count>=50000000){ stmt.execute("set mapred.reduce.tasks=100"); }else if(10000000<=count&&count<=50000000){ stmt.execute("set mapred.reduce.tasks=20"); }else{ stmt.execute("set mapred.reduce.tasks=10"); } String insertSql = "insert into table " + orcTableName + " select * from " + textfileTableName + " where resource_flag = 0 distribute by rand()"; log.info("insertSql:" + insertSql); stmt.execute(insertSql); stmt.close(); con.close(); } /** * 根据表名统计数据表的记录数和文件大小 * * @author ZYY * @since 2015/1/14 */ @Override public Map<String, String> getCountAndSize(String tableName, String hiveUrl) throws SQLException { Map<String, String> map = new HashMap<String, String>(); //返回结果map String[] pathAndSize = new String[2]; //存储数据大小,地址数组变量 String count = ""; //数据表记录量变量/* * 获取用户名,密码,得到jdbchive链接 * */ String hiveUser = Messages.getString("hiveUser"); String hivePwd = Messages.getString("hivePwd"); System.setProperty("HADOOP_USER_NAME", Messages.getString("hiveUser")); String rootPath = Messages.getString("HDFSUrl"); try { String jdbcHiveDriver = Messages.getString("jdbcHiveDriver"); Class.forName(jdbcHiveDriver); } catch (ClassNotFoundException e) { log.error("hive链接异常", e); e.printStackTrace(); } Connection con = DriverManager .getConnection(hiveUrl, hiveUser, hivePwd); Statement stmt = con.createStatement(); //定义获取数据表记录总量的sql String countSql = "select count(*) from " + tableName; log.info("获取数据表记录总量的sql" + countSql); try { ResultSet rs = stmt.executeQuery(countSql); if (rs.next()) { count = rs.getString(1); } } catch (Exception e) { log.error("SQL执行异常", e); e.printStackTrace(); } //定义获取hive中数据大小和地址sql String sizesql = "describe formatted " + tableName; ResultSet set = stmt.executeQuery(sizesql); while (set.next()) { String location = set.getString(1); if (location != null && "Location:".equals(location.replace(" ", ""))) pathAndSize[0] = set.getString(2); String totalSize = set.getString(2); if (totalSize != null && "totalSize".equals(totalSize.replace(" ", ""))) pathAndSize[1] = set.getString(3); } // 由于hive创建的是外表,对path和siz进行处理 // 将path中的节点信息改为port if (pathAndSize[0] != null && !pathAndSize[0].contains(rootPath)) { String path = pathAndSize[0]; String[] paths = path.split("://"); if (paths.length > 1) { String dfs = paths[1]; String[] filPaths = dfs.split("/"); if (filPaths.length > 0) { String f = filPaths[0]; path = dfs.replace(f, rootPath); pathAndSize[0] = path; } } } // hive外表不能获取size的处理 if (pathAndSize[1] == null || pathAndSize[1].equals("") || "0".equals(pathAndSize[1].trim())) { if (pathAndSize[0] != null) { String path = pathAndSize[0]; Path p = new Path(path); long total = 0; Configuration conf = new Configuration(); try { FileSystem fs = p.getFileSystem(conf); boolean isHad = fs.exists(p); if (isHad) { RemoteIterator<LocatedFileStatus> fd = fs.listFiles(p, true);// 获取文件夹下所有文件 while (fd.hasNext()) { LocatedFileStatus lf = fd.next();// 获取文件 System.out.println(lf.getLen()); total = total + lf.getLen();// 文件大小 } }// 将单位由b转换为kb total =total/1024; pathAndSize[1] = total + ""; fs.close(); } catch (IOException e) { log.error("Hive文件读取出错", e); e.printStackTrace(); } } } //关闭结果集,事务和数据库链接 set.close(); stmt.close(); con.close(); //将结果存入到结果map map.put("count", count); map.put("size", pathAndSize[1]); return map; } /** * 增11111111量导入的数据,在hive中全部删除 */ public void deleteIncrementDataExistInOrcTable(String textfileTable, String orcTableName, String primaryKey, String hiveUrl) throws SQLException { String hiveUser = Messages.getString("hiveUser"); String hivePwd = Messages.getString("hivePwd"); System.setProperty("HADOOP_USER_NAME", hiveUser); try { String jdbcHiveDriver = Messages.getString("jdbcHiveDriver"); Class.forName(jdbcHiveDriver); } catch (ClassNotFoundException e) { log.error("hive连接异常", e); e.printStackTrace(); } Connection con; con = DriverManager.getConnection(hiveUrl, hiveUser, hivePwd); Statement stmt = con.createStatement(); String deleteSql = "delete from " + orcTableName + " where " + primaryKey + " in (select " + primaryKey + " from " + textfileTable + ")"; log.info("deleteSql:" + deleteSql); stmt.execute(deleteSql); stmt.close(); con.close(); } /** * merge临时表和orc add by yangqi 2015/10/14 */ @Override public void mergeIntoHiveOrcTable(Map<String, String[]> map, String hiveUrl, String primaryKey) throws SQLException { String hiveUser = Messages.getString("hiveUser"); String hivePwd = Messages.getString("hivePwd"); System.setProperty("HADOOP_USER_NAME", hiveUser); try { String jdbcHiveDriver = Messages.getString("jdbcHiveDriver"); Class.forName(jdbcHiveDriver); } catch (ClassNotFoundException e) { log.error("hive连接异常", e); e.printStackTrace(); } Connection con; con = DriverManager.getConnection(hiveUrl, hiveUser, hivePwd); Statement stmt = con.createStatement(); String resourceId = map.get("resourceId")[0]; String tableName = map.get("tableName")[0]; String orcTableName = resourceId + "_orc_" + tableName; String tempOrcTable = resourceId + "_" + tableName; StringBuffer mergeSql = new StringBuffer("MERGE INTO " + orcTableName + " a USING " + tempOrcTable + " b ON (a." + primaryKey + " = b." + primaryKey + ") WHEN MATCHED THEN UPDATE SET "); String[] cols = map.get(tableName); if (cols != null && cols.length > 0) { for (int i = 0; i < cols.length; i++) { if (0 == i) { mergeSql.append(cols[i].split(" ")[0] + " = b." + cols[i].split(" ")[0]); } else { mergeSql.append(", " + cols[i].split(" ")[0] + " = b." + cols[i].split(" ")[0]); } } } mergeSql.append(" WHEN NOT MATCHED THEN INSERT ("); if (cols != null && cols.length > 0) { for (int i = 0; i < cols.length; i++) { if (0 == i) { mergeSql.append(cols[i].split(" ")[0]); } else { mergeSql.append(", " + cols[i].split(" ")[0]); } } } mergeSql.append(") VALUES("); if (cols != null && cols.length > 0) { for (int i = 0; i < cols.length; i++) { if (0 == i) { mergeSql.append("b." + cols[i].split(" ")[0]); } else { mergeSql.append(", " + "b." + cols[i].split(" ")[0]); } } } mergeSql.append(");"); log.info("mergeSql" + mergeSql); stmt.execute(mergeSql.toString().trim()); stmt.close(); con.close(); } /** * 创建orc临时表 yangqi 2015/10/23 */ @Override public String createTempHiveORCTable(Map<String, String[]> map, String primaryKey, String hiveUrl) throws SQLException { String hiveUser = Messages.getString("hiveUser"); String hivePwd = Messages.getString("hivePwd"); System.setProperty("HADOOP_USER_NAME", hiveUser); if (map == null || map.get("tableName") == null || map.get("tableName").length == 0) return null; try { String jdbcHiveDriver = Messages.getString("jdbcHiveDriver"); Class.forName(jdbcHiveDriver); } catch (ClassNotFoundException e) { log.error("hive链接异常", e); e.printStackTrace(); } Connection con; String resourceId = map.get("resourceId")[0]; String tableName = map.get("tableName")[0]; con = DriverManager.getConnection(hiveUrl, hiveUser, hivePwd); Statement stmt = con.createStatement(); String[] cols = map.get(tableName); String table = resourceId + "_temp_orc_" + tableName;// 标识为orc表 try { deleteFdfsByHiveTable(table); } catch (ClassNotFoundException e) { log.error("hive链接异常", e); // TODO Auto-generated catch block e.printStackTrace(); } // 同时删除对应的hdfs文件,因为是建外表 String dropIfExistsTable = "drop table if exists " + table; stmt.execute(dropIfExistsTable); StringBuffer createSql = new StringBuffer("create external table " + table + "("); if (cols != null && cols.length > 0) { for (int i = 0; i < cols.length; i++) { if (i == 0) { createSql.append("`" + cols[i].replace(" ", "` ")); } else { createSql.append("," + "`" + cols[i].replace(" ", "` ")); } } } createSql.append( ") CLUSTERED BY (" + primaryKey + ") INTO " + "10" + " BUCKETS ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' STORED AS ORC TBLPROPERTIES " + "('transactional'='true')" ); log.info("createSql" + createSql); stmt.execute(createSql.toString().trim()); stmt.close(); con.close(); return table; } /** * 将hive临时表数据导入到hive orc表中 */ @Override public void insertIntoTempOrcTable(String textfileTableName, String tempOrcTable, String hiveUrl) throws SQLException { String hiveUser = Messages.getString("hiveUser"); String hivePwd = Messages.getString("hivePwd"); System.setProperty("HADOOP_USER_NAME", hiveUser); try { String jdbcHiveDriver = Messages.getString("jdbcHiveDriver"); Class.forName(jdbcHiveDriver); } catch (ClassNotFoundException e) { log.error("hive链接异常", e); e.printStackTrace(); } Connection con; con = DriverManager.getConnection(hiveUrl, hiveUser, hivePwd); Statement stmt = con.createStatement(); String insertSql = "insert overwrite table " + tempOrcTable + " select * from " + textfileTableName; log.info("insertSql" + insertSql); stmt.execute(insertSql); stmt.close(); con.close(); } public String createOrUpdateHiveTable(Map<String, String[]> map, String pCol, String hiveUrl, String HDFSPAth) throws SQLException { String hiveUser = Messages.getString("hiveUser"); String hivePwd = Messages.getString("hivePwd"); System.setProperty("HADOOP_USER_NAME", hiveUser); if (map == null || map.get("tableName") == null || map.get("tableName").length == 0) return null; try { String jdbcHiveDriver = Messages.getString("jdbcHiveDriver"); Class.forName(jdbcHiveDriver); } catch (ClassNotFoundException e) { log.error("hive链接异常", e); e.printStackTrace(); } Connection con; String resourceId = map.get("resourceId")[0]; String tableName = map.get("tableName")[0]; con = DriverManager.getConnection(hiveUrl, hiveUser, hivePwd); Statement stmt = con.createStatement(); String[] cols = map.get(tableName); String table = resourceId + "_" + tableName; // try { // deleteFdfsByHiveTable(table); // } catch (ClassNotFoundException e) { // // TODO Auto-generated catch block // e.printStackTrace(); // } // 同时删除对应的hdfs文件,因为是建外表 // String dropIfExistsTable = "drop table if exists " + table; // stmt.execute(dropIfExistsTable); // 创建分区表 StringBuffer createSql = new StringBuffer("CREATE TABLE IF NOT EXISTS " + table + "("); if (cols != null && cols.length > 0) for (int i = 0; i < cols.length; i++) createSql.append(cols[i] + ","); createSql.append(") PARTITIONED BY (p_column String) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' "); createSql = new StringBuffer(createSql.toString().replace(",)", ")")); log.info("hive建表语句:" + createSql); stmt.execute(createSql.toString().trim()); // 修改表创建分区 String alterSql = "ALTER TABLE " + table + " ADD IF NOT EXISTS PARTITION (p_column='" + pCol + "')"; // + "LOCATION '" + HDFSPAth + "'"; log.info("hive修改分区语句:" + alterSql); stmt.execute(alterSql); // load数据 String loadSql = "LOAD DATA INPATH '" + HDFSPAth + "' OVERWRITE INTO TABLE " + table + " PARTITION(p_column='" + pCol + "')"; log.info("hive分区导入数据:" + loadSql); stmt.execute(loadSql); stmt.close(); con.close(); return table; } @Override public void deleteHiveTruePartition(String tableName, String partitionStr) throws SQLException { // TODO Auto-generated method stub String deleteSql = "ALTER TABLE " + tableName + " DROP IF EXISTS PARTITION (" + partitionStr + ")"; System.out.println("Running: " + deleteSql); try { Class.forName(jdbcHiveDriver); } catch (ClassNotFoundException e) { e.printStackTrace(); log.error("hive连接异常", e); } log.info("deleteSql" + deleteSql); Connection con = DriverManager.getConnection(jdbcHiveUrl, hiveUser, hivePwd); java.sql.Statement stmt = con.createStatement(); stmt.execute(deleteSql); stmt.close(); con.close(); } @Override public String createHivePartitionTable(String tableName,List<ColumnVO> columnVOs, String HDFSPAth) throws SQLException { System.out.println("in to createHivePartitionTable"); String hiveUser = Messages.getString("hiveUser"); String hivePwd = Messages.getString("hivePwd"); String hiveUrl = Messages.getString("jdbcHiveUrl"); System.setProperty("HADOOP_USER_NAME", hiveUser); try { String jdbcHiveDriver = Messages.getString("jdbcHiveDriver"); Class.forName(jdbcHiveDriver); } catch (ClassNotFoundException e) { log.error("hive连接异常", e); e.printStackTrace(); } Connection con; con = DriverManager.getConnection(hiveUrl, hiveUser, hivePwd); Statement stmt = con.createStatement(); StringBuffer createSql = new StringBuffer("create table IF NOT EXISTS " + tableName + "_tmp ("); StringBuffer columnSql = new StringBuffer(); for (int i = 0; i < columnVOs.size()-1; i++) { createSql.append(columnVOs.get(i).getColumnName() + " string,"); columnSql.append(columnVOs.get(i).getColumnName()+","); } createSql.append("p_column String,"); createSql.append(columnVOs.get(columnVOs.size()-1).getColumnName() + " string"); columnSql.append(columnVOs.get(columnVOs.size()-1).getColumnName()+","); createSql.append(") ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' LOCATION "); createSql.append(" '" + HDFSPAth + "'"); log.info("createSql:" + createSql); System.out.println(createSql); stmt.execute(createSql.toString().trim()); String createPartitionSql = createSql.toString().replace("_tmp", "") .replace(",p_column String", "") .replace(") ROW", ") partitioned by (p_hive String) ROW") .split("LOCATION ")[0]; System.out.println(createPartitionSql); log.info("sql:" + createPartitionSql); stmt.execute(createPartitionSql); stmt.execute("set hive.exec.dynamic.partition=true"); stmt.execute("set hive.exec.dynamic.partition.mode=nonstrict"); String insertPartitionSql = "insert overwrite table " + tableName + " partition(p_hive) select " + columnSql.toString() + "substr(p_column,1,length( p_column )-1) p_hive FROM " + tableName + "_tmp"; System.out.println(insertPartitionSql); log.info("sql:" + insertPartitionSql); stmt.execute(insertPartitionSql); String dropIfExistsTable = "drop table if exists " + tableName + "_tmp"; log.info("sql:" + dropIfExistsTable); stmt.execute(dropIfExistsTable); stmt.close(); con.close(); // return tableName; } @Override public List<String> getTablesColName(String tableName) { List<String> result = null; if (!StringUtils.isBlank(tableName)) { tableName = tableName.trim(); boolean tableExist = false;// 标示表是否存在 try { Class.forName(jdbcHiveDriver); } catch (ClassNotFoundException e) { log.error("Hive链接异常", e); e.printStackTrace(); } try { tableExist = existTable(tableName); } catch (SQLException e1) { log.error("SQL执行异常", e1); log.error(e1.getMessage()); } if (tableExist) { Connection con = null; Statement stmt = null; try { con = DriverManager.getConnection(jdbcHiveUrl, hiveUser, hivePwd); stmt = con.createStatement(); ResultSet resultSet = null; log.info("sql:" + "select * from " + tableName + " limit 1"); resultSet = stmt.executeQuery("select * from " + tableName + " limit 1"); result = new ArrayList<>(); // 获取列名 ResultSetMetaData metaData = resultSet.getMetaData(); for (int i = 0; i < metaData.getColumnCount(); i++) { // resultSet数据下标从1开始 String columnName = metaData.getColumnName(i + 1); result.add(columnName); } } catch (SQLException e) { log.error("SQL执行异常", e); log.error(e.getMessage()); } finally {// 释放资源 try { if (null != stmt) stmt.close(); if (null != con) con.close(); } catch (SQLException e) { log.error("hive链接关闭异常", e); e.printStackTrace(); } } } } return result; } @Override public Map<String, String[]> getTablesCol(String url, long resourceId, String userName, String password, String goOnTableName, String tableName) { Map<String, String[]> map = new HashMap<String, String[]>(); try { String jdbcMysqlDriver = Messages.getString("jdbc_mysql_driver"); if (url.contains("jdbc:oracle")) { jdbcMysqlDriver = Messages.getString("jdbc_oracle_driver"); } else if (url.contains("jdbc:sqlserver")) { jdbcMysqlDriver = Messages.getString("jdbc_sqlserver_driver"); } else if (url.contains("jdbc:sybase:Tds")) { jdbcMysqlDriver = Messages.getString("jdbc_sybase_driver"); } Class.forName(jdbcMysqlDriver); } catch (ClassNotFoundException e) { log.error("jdbc链接异常", e); e.printStackTrace(); } Connection con; try { con = DriverManager.getConnection(url, userName, password); Statement stmt = con.createStatement(); Boolean id = false; String sql = null; String sqltableComments = "";//查找表对应的comments字段的SQL语句 String sqlColumnInfo = "";//查找表中所有字段的信息 if (url.contains("jdbc:oracle")) { sql = "select * from " + tableName + " where rownum<=1"; sqltableComments = "select comments from user_tab_comments WHERE table_name = '"+tableName+"'"; sqlColumnInfo = "select COLUMN_NAME,DATA_TYPE from user_tab_columns where table_name = '"+tableName+"'"; } else if (url.contains("jdbc:sqlserver")) { sql = "select top 1 * from " + tableName; sqltableComments = "select * from TABLES where TABLE_SCHEMA='my_db' and table_name='"+tableName+"'"; sqlColumnInfo = "select * from INFORMATION_SCHEMA.columns where table_name = '"+tableName+"'"; } else if (url.contains("jdbc:sybase:Tds")) { sql = "select top 1 * from " + tableName; } else { sql = "select * from " + tableName + " limit 1"; sqltableComments = "SHOW TABLE STATUS LIKE \'" + tableName + "\'"; sqlColumnInfo = "show full fields from " + tableName; } log.info("sql" + sql); String[] tableRemarkInfo = new String[1]; ResultSet colsSet = stmt.executeQuery(sql); System.out.println(sql); ResultSetMetaData data = colsSet.getMetaData(); int count = data.getColumnCount(); String[] resourceIds = {resourceId + ""}; String[] cols = new String[count]; String[] colsNameAndType = new String[count];//存储字段名和字段类型 added by XH 2016-2-16 10:15:58 String[] colsRemarks = new String[count];//存储字段备注 String[] parColumn = {""}; colsSet.close(); //查完表信息先关 if (!sqltableComments.isEmpty() && !sqlColumnInfo.isEmpty()) { ResultSet tableRemarkSet = stmt.executeQuery(sqltableComments); while (tableRemarkSet.next()) { if (url.contains("jdbc:mysql")) { tableRemarkInfo[0] = tableRemarkSet.getString("Comment"); } else if(url.contains("jdbc:oracle")){ tableRemarkInfo[0] = tableRemarkSet.getString("comments"); }else{ tableRemarkInfo[0] = tableRemarkSet.getString(1); } break; } tableRemarkSet.close(); ResultSet colSet = stmt.executeQuery(sqlColumnInfo); int i = 0; while (colSet.next()) { String ColumnName = ""; String ColumnType = ""; String ColumnRemark = ""; if(url.contains("jdbc:oracle")){ ColumnName = colSet.getString("COLUMN_NAME"); ColumnType = colSet.getString("DATA_TYPE"); String sqlcolumnComment = "select comments from user_col_comments where table_name='"+tableName+"' and COLUMN_NAME = '"+ColumnName+"'"; ResultSet columnCommentSet = stmt.executeQuery(sqlcolumnComment); while(columnCommentSet.next()){ ColumnRemark = columnCommentSet.getString("comments"); break; } columnCommentSet.close(); } cols[i] = ColumnName + " " + "String"; colsNameAndType[i] = ColumnName + " " + ColumnType;//设置字段名和字段类型 colsRemarks[i++] = ColumnRemark; } colSet.close(); } else { for (int i = 1; i <= count; i++) { String cloName = data.getColumnName(i); //字段名 commtens added by XH 2016-2-3 10:44:19 String cloType = data.getColumnTypeName(i);// 字段类型 comments added by XH 2016-2-3 10:44:34 cols[i - 1] = cloName + " " + "String"; colsNameAndType[i - 1] = cloName + " " + cloType;//设置字段名和字段类型 if (parColumn[0].equals("")) { if (!cloType.equals("DATE")) { parColumn[0] = cloName; } } } } if (goOnTableName == null || goOnTableName.equals("") || goOnTableName.equals(" ")) { id = true; } else { if (tableName.equals(goOnTableName)) id = true; } if (id) { //导入hive表新增一个导入平台时间字段 String colsTime = "load_bigdata_time" + " " + "String";//创建hive表字段 String colsNameAndTypeTime = "load_bigdata_time" + " " + "datetime";//保存到元数据字段表字段 if(!isHaveStr(cols, "load_bigdata_time")) {//导入的表中不存在新增字段 // 字段数组中增加新增字段元素 List<String> listCol = new ArrayList<String>(); List<String> listColAndType = new ArrayList<String>(); List<String> listColsRemark = new ArrayList<String>(); for (int j = 0; j < cols.length; j++) { listCol.add(cols[j]); listColAndType.add(colsNameAndType[j]); listColsRemark.add(colsRemarks[j]); } listCol.add(colsTime); listColAndType.add(colsNameAndTypeTime); listColsRemark.add("导入平台时间"); // 返回String型的数组 cols = listCol.toArray(new String[0]);//创建hive表用 colsNameAndType = listColAndType.toArray(new String[0]);//保存元数据字段表用 colsRemarks = listColsRemark.toArray(new String[0]);//保存元数据字段表用 } map.put(tableName, cols); String[] talbelNames = {tableName}; map.put("tableName", talbelNames); map.put("resourceId", resourceIds); map.put("partitionColumn", parColumn); map.put("colsNameAndType", colsNameAndType); map.put("tableRemark", tableRemarkInfo); map.put("colsRemark", colsRemarks); } //tmt.close(); //ctmt.close(); stmt.close(); con.close(); } catch (SQLException e) { log.error("SQL执行异常", e); // TODO Auto-generated catch block e.printStackTrace(); } finally { } return map; } /** * 此方法有两个参数,第一个是要查找的字符串数组,第二个是要查找的字符或字符串 * @param strs * @param s * @return true包含,false不包含 */ public static boolean isHaveStr(String[] strs,String s){ for(int i=0;i<strs.length;i++){ if(strs[i].indexOf(s)!=-1){//循环查找字符串数组中的每个字符串中是否包含所有查找的内容 return true;//查找到了就返回真,不在继续查询 } } return false;//没找到返回false } @Override public boolean grantTableSelectToUser(String tableName, String username) { boolean flag = false; try { String sql = "grant select on table " + tableName + " to user " + username; log.info("数据库授权语句" + sql); Class.forName(jdbcHiveDriver); Connection con = DriverManager.getConnection(jdbcHiveUrl, hiveUser, hivePwd); Statement statement = con.createStatement(); statement.execute(sql); statement.close(); con.close(); System.out.println(sql); flag = true; } catch (Exception e) { log.error("hive链接出错", e); e.printStackTrace(); } return flag; } @Override public boolean revokeTableSelectFromUser(String tableName, String username) { boolean flag = false; try { String sql = "revoke select on table " + tableName + " from user " + username; log.info("sql" + sql); Class.forName(jdbcHiveDriver); Connection con = DriverManager.getConnection(jdbcHiveUrl, hiveUser, hivePwd); Statement statement = con.createStatement(); statement.execute(sql); statement.close(); con.close(); System.out.println(sql); flag = true; } catch (Exception e) { log.error("hive链接出错", e); e.printStackTrace(); } return flag; } @Override public boolean importMongoDBToHive(List<String> list, String tableName, String mongoUrl) { Connection con = null; //定义链接并初始化 Statement statement = null; //定义事务并初始化 //得到hive链接,若失败,抛出异常并且返回 try { Class.forName(jdbcHiveDriver); con = DriverManager.getConnection(jdbcHiveUrl, hiveUser, hivePwd); statement = con.createStatement(); } catch (Exception e) { log.error("hive链接出错", e); e.printStackTrace(); return false; } String dropIfExistsTable = "drop table if exists " + tableName + "_temp"; log.info("dropIfExistsTable" + dropIfExistsTable); try { statement.execute(dropIfExistsTable); } catch (SQLException e1) { log.error("SQL异常", e1); e1.printStackTrace(); return false; } //根据传入的值,得到创建hive与mongodb关联sql语句 StringBuffer createSql = new StringBuffer(); createSql.append("create external table " + tableName + "_temp ("); for (int i = 0; i < list.size(); i++) { createSql.append(list.get(i) + " String"); if (i != list.size() - 1) { createSql.append(","); } } createSql.append(") stored by 'com.mongodb.hadoop.hive.MongoStorageHandler' with serdeproperties('mongo.columns.mapping'='{"); for (int i = 0; i < list.size(); i++) { createSql.append("\"" + list.get(i) + "\" : \"" + list.get(i) + "\""); if (i != list.size() - 1) { createSql.append(","); } } createSql.append("}') tblproperties('mongo.uri'='" + mongoUrl + "') "); log.info("createSql" + createSql); System.out.println(createSql); //执行sql语句并且返回结果,若执行失败,抛出异常并且返回 try { statement.execute(createSql.toString().trim()); } catch (Exception e) { e.printStackTrace(); log.error("SQL执行异常", e); return false; } String dropIfExistsHiveTable = "drop table if exists " + tableName; log.info("dropIfExistsHiveTableSql" + dropIfExistsHiveTable); try { statement.execute(dropIfExistsHiveTable); } catch (SQLException e1) { log.error("SQL执行异常", e1); e1.printStackTrace(); return false; } //创建hive表 String createHiveSql = "create table " + tableName + " as select * from " + tableName + "_temp"; System.out.println(createHiveSql); log.info("createHiveSql" + createHiveSql); //执行sql语句并且返回结果,若执行失败,抛出异常并且返回 try { statement.execute(createHiveSql); } catch (Exception e) { log.error("SQL执行异常", e); e.printStackTrace(); return false; } //向创建好的hive表中插入数据 String insertHiveSql = "insert overwrite table " + tableName + " select * from " + tableName + "_temp"; log.info("insertHiveSql" + insertHiveSql); System.out.println(insertHiveSql); //执行sql语句并且返回结果,若执行失败,抛出异常并且返回 try { statement.execute(insertHiveSql); } catch (Exception e) { log.error("SQL执行异常", e); e.printStackTrace(); return false; } String dropIfExistsTempTable = "drop table if exists " + tableName + "_temp"; log.info("dropIfExistsTempTable" + dropIfExistsTempTable); try { statement.execute(dropIfExistsTempTable); } catch (SQLException e1) { log.error("SQL执行异常", e1); e1.printStackTrace(); return false; } try { statement.close(); con.close(); } catch (SQLException e) { e.printStackTrace(); log.error("hive JDBC链接关闭异常", e); return false; } return true; } @Override public boolean createNewTable(String sql) throws SQLException { log.info("创建数据表sql:" + sql); System.out.println("Running: " + sql); try { Class.forName(jdbcHiveDriver); } catch (ClassNotFoundException e) { log.error("hive链接出错", e); e.printStackTrace(); } Connection con = DriverManager.getConnection(jdbcHiveUrl, hiveUser, hivePwd); java.sql.Statement stmt = con.createStatement(); String testSql = "SET transaction.type = inceptor"; stmt.execute(testSql); stmt.execute(sql); stmt.close(); con.close(); return true; } @Override public boolean insertDateToTabel(String sql) throws SQLException { log.info("创建数据表sql:" + sql); System.out.println("Running: " + sql); try { Class.forName(jdbcHiveDriver); } catch (ClassNotFoundException e) { log.error("hive链接出错", e); e.printStackTrace(); } Connection con = DriverManager.getConnection(jdbcHiveUrl, hiveUser, hivePwd); java.sql.Statement stmt = con.createStatement(); String testSql = "SET transaction.type = inceptor"; stmt.execute(testSql); stmt.execute(sql); stmt.close(); con.close(); return true; } @Override public ArrayList<String[]> searchBySelcetAll(String selectSql) throws SQLException { ArrayList<String[]> datas = new ArrayList<String[]>(); try { final String jdbcHiveDriver = Messages.getString("jdbcHiveDriver"); Class.forName(jdbcHiveDriver); } catch (final ClassNotFoundException e) { log.error("hive链接出错", e); e.printStackTrace(); } Connection con; con = DriverManager.getConnection(jdbcHiveUrl, hiveUser, hivePwd); final Statement stmt = con.createStatement(); ResultSet datSet = null; try { datSet = stmt.executeQuery(selectSql); } catch (Exception e) { e.printStackTrace(); log.error("sql执行出错", e); if (e instanceof SQLException) { throw new SQLException(e.getCause()); } } final ResultSetMetaData col = datSet.getMetaData(); final int count = col.getColumnCount(); final String[] cols = new String[count]; for (int i = 1; i <= count; i++) { final String cloName = col.getColumnName(i); cols[i - 1] = cloName; } datas.add(cols); while (datSet.next()) { final String[] colDatas = new String[count]; for (int j = 1; j <= count; j++) { colDatas[j - 1] = datSet.getString(j); } datas.add(colDatas); } stmt.close(); con.close(); return datas; } @Override public boolean judgeUserHadSelectAuthorToTable(String tableName, String userName, String password) { //如果表名或者用户名为空,直接返回false(没有权限) if (tableName == null || userName == null) { return false; } boolean had = true;//默认是有权限 try { final String jdbcHiveDriver = Messages.getString("jdbcHiveDriver"); Class.forName(jdbcHiveDriver); } catch (final ClassNotFoundException e) { log.error("hive链接出错", e); e.printStackTrace(); } Connection con = null; Statement stmt = null; try { con = DriverManager.getConnection(jdbcHiveUrl, userName, password); stmt = con.createStatement(); String selectSql = "select * from " + tableName + " limit 1"; if(tableName.toLowerCase().indexOf("select")>-1){ if(tableName.toLowerCase().indexOf("limit")>-1){ selectSql = tableName; }else{ selectSql = tableName + " limit 1"; } } selectSql = selectSql.replaceAll(" +"," "); log.info(selectSql); stmt.executeQuery(selectSql);//只要查询不报错就是有权限 } catch (SQLException e1) { e1.printStackTrace(); had = false; } try { stmt.close(); con.close(); } catch (SQLException e) { e.printStackTrace(); } return had; } @Override public List<Map<String, Object>> queryBySql(String sql) { //创建集合列表用以保存所有查询到的记录 List<Map<String, Object>> list = new LinkedList<>(); ResultSet resultSet = null; Statement statement = null; Connection con = null; try { final String jdbcHiveDriver = Messages.getString("jdbcHiveDriver"); Class.forName(jdbcHiveDriver); con = DriverManager.getConnection(jdbcHiveUrl, hiveUser, hivePwd); statement = con.createStatement(); resultSet = statement.executeQuery(sql); //ResultSetMetaData 是结果集元数据,可获取关于 ResultSet 对象中列的类型和属性信息的对象 例如:结果集中共包括多少列,每列的名称和类型等信息 ResultSetMetaData rsmd = resultSet.getMetaData(); //获取结果集中的列数 int columncount = rsmd.getColumnCount(); //while条件成立表明结果集中存在数据 while (resultSet.next()) { //创建一个HashMap用于存储一条数据 HashMap<String, Object> onerow = new HashMap<>(); //循环获取结果集中的列名及列名所对应的值,每次循环都得到一个对象,形如:{TEST_NAME=aaa, TEST_NO=2, TEST_PWD=aaa} for (int i = 0; i < columncount; i++) { //获取指定列的名称,注意orcle中列名的大小写 String columnName = rsmd.getColumnName(i + 1); onerow.put(columnName, resultSet.getObject(i + 1)); } //将获取到的对象onewrow={TEST_NAME=aaa, TEST_NO=2, TEST_PWD=aaa}放到集合列表中 list.add(onerow); } } catch (SQLException | ClassNotFoundException e) { e.printStackTrace(); return null; } finally { try { if (null != resultSet) resultSet.close(); if (null != statement) statement.close(); if (null != con) con.close(); } catch (SQLException e) { e.printStackTrace(); } } return list; } @Override public void dropUserFunction(String name) throws Exception { Class.forName(jdbcHiveDriver); Connection con = DriverManager.getConnection(jdbcHiveUrl, hiveUser, hivePwd); String sql = "drop temporary function "+name+";"; Statement statement = con.createStatement(); statement.execute(sql); statement.close(); con.close(); } @Override public String createHiveTableForText(String tableName, String columnAndTypes, String localPath, String tableSeperator) throws SQLException{ String hiveUser = Messages.getString("hiveUser"); String hivePwd = Messages.getString("hivePwd"); String hiveUrl = Messages.getString("jdbcHiveUrl"); System.setProperty("HADOOP_USER_NAME", hiveUser); if (tableName == null || tableName.trim().equals("") || columnAndTypes == null || columnAndTypes.trim().equals("")) return null; try { String jdbcHiveDriver = Messages.getString("jdbcHiveDriver"); Class.forName(jdbcHiveDriver); } catch (ClassNotFoundException e) { e.printStackTrace(); log.error("hive链接异常", e); } Connection con; con = DriverManager.getConnection(hiveUrl, hiveUser, hivePwd); Statement stmt = con.createStatement(); String table = tableName; String showTablesql = "show tables '" + table + "'"; ResultSet tableSet = stmt.executeQuery(showTablesql); if (tableSet.next()) { return "exist"; } StringBuffer createSql = new StringBuffer("create external table " + table + "("); createSql.append(columnAndTypes); createSql.append(") ROW FORMAT DELIMITED FIELDS TERMINATED BY '" + tableSeperator + "' STORED AS TEXTFILE"); log.info("createSql:" + createSql); stmt.execute(createSql.toString().trim()); String loadSql = "load data local inpath '" + localPath + "' into table " + table; log.info("loadSql:" + loadSql); stmt.execute(loadSql); stmt.close(); con.close(); return table; }}
package com.scheduler.service.impl;import com.scheduler.service.JDBCService;import com.scheduler.util.ColumnVO;import com.scheduler.util.Messages;import com.scheduler.util.TableVO;import org.springframework.stereotype.Service;import javax.annotation.Resource;import java.sql.*;import java.util.ArrayList;import java.util.List;import java.util.Properties;import java.util.Properties;import javax.annotation.Resource;import org.springframework.stereotype.Service;@Service("JDBCServiceImpl")public class JDBCServiceImpl implements JDBCService { @Override public List<TableVO> getTables(String JDBCurl, String userId, String password, String catalog, String schemaPattern, String tableName, String[] type) { //定义结果list List<TableVO> result= new ArrayList<TableVO>(); //定义数据库链接 Connection conn = null; //定义数据库查询结果劫 ResultSet rs = null; try { //获取数据库链接 conn=getConnection(getDriver(JDBCurl), JDBCurl, userId, password); //获取结果 rs=conn.getMetaData().getTables(catalog, schemaPattern, tableName, type); //循环获取所有结果 while(rs.next()){ TableVO table= new TableVO(); table.setTableName(rs.getString("TABLE_NAME")); table.setComment(rs.getString("REMARKS")); table.setType(rs.getString("TABLE_TYPE")); result.add(table); } } catch (Exception e) { e.printStackTrace(); } //关闭所有链接 closeConnection(conn, null, rs); //返回结果 return result; } @Override public List<ColumnVO> getColumns(String JDBCurl, String userId, String password, String tableName) { //定义结果list List<ColumnVO> result= new ArrayList<ColumnVO>(); //定义数据库链接 Connection conn = null; //定义数据库查询结果劫 ResultSet rs = null; try { //获取数据库链接 if(JDBCurl.contains("jdbc:mysql") && !JDBCurl.contains("characterEncoding")) { JDBCurl = JDBCurl + "?characterEncoding=UTF-8";//设置编码 } conn=getConnection(getDriver(JDBCurl), JDBCurl, userId, password); //获取结果 rs=conn.getMetaData().getColumns(null, null, tableName, null); //循环获取所有结果 String columnNames = ","; while(rs.next()){ if(!columnNames.contains(","+ rs.getString("COLUMN_NAME")+",")){ columnNames = columnNames + rs.getString("COLUMN_NAME") + ","; ColumnVO cVo= new ColumnVO(); cVo.setColumnName(rs.getString("COLUMN_NAME")); cVo.setComment(rs.getString("REMARKS")); cVo.setType(rs.getString("TYPE_NAME")); result.add(cVo); } } } catch (Exception e) { e.printStackTrace(); } //关闭所有链接 closeConnection(conn, null, rs); //返回结果 return result; } @Override public String getPrimaryKey(String JDBCurl, String userId, String password, String catalog, String schemaPattern, String tableName) { // 定义结果字符串 String primaryKey = ""; // 定义数据库链接 Connection conn = null; // 定义数据库查询结果劫 ResultSet rs = null; try { // 获取数据库链接 conn = getConnection(getDriver(JDBCurl), JDBCurl, userId, password); // 获取结果 rs = conn.getMetaData().getPrimaryKeys(null, null, tableName); while (rs.next()) { primaryKey = rs.getString(4); } } catch (Exception e) { e.printStackTrace(); } // 关闭所有链接 closeConnection(conn, null, rs); return primaryKey; } @Override public Long getTableNum(String JDBCUrl, String userName, String password, String tableName) { String driver = getDriver(JDBCUrl); long num = 0; try { Connection conn = getConnection(driver, JDBCUrl, userName, password); String sql = "select count(0) from " + tableName; Statement stat = conn.createStatement(); ResultSet rs = stat.executeQuery(sql); if (rs.next()) { num = Long.parseLong(rs.getString(1)); } }catch (Exception e){ e.printStackTrace(); } return num; } /** * <p> * Description: [根据传入的url获取数据库Driver] * </p> * Created by [ZYY] [2017-4-26] Modified by [修改人] [修改时间] * @param JDBCurl 数据库链接串 * @return String 数据库Driver */ public String getDriver(String jdbcUrl){ String driver=""; if(jdbcUrl.contains("jdbc:mysql")){ driver = Messages.getString("jdbc_mysql_driver"); } else if (jdbcUrl.contains("jdbc:oracle")) { driver = Messages.getString("jdbc_oracle_driver"); } else if (jdbcUrl.contains("jdbc:sqlserver")) { driver = Messages.getString("jdbc_sqlserver_driver"); } else if (jdbcUrl.contains("jdbc:sybase:Tds")) { driver = Messages.getString("jdbc_sybase_driver"); } return driver; } /** * <p> * Description: [得到通用的jdbc链接] * </p> * Created by [ZYY] [2017-4-26] Modified by [修改人] [修改时间] * @param driver 数据库驱动 * @param JDBCurl 数据库链接串 * @param userName 数据库用户名 * @param password 数据库密码 * @return String 数据库Driver */ public Connection getConnection(String driver, String url, String userName, String password) { //定义链接 Connection connection = null; //加载数据库驱动 try { Class.forName(driver); } catch (ClassNotFoundException e) { e.printStackTrace(); System.out.println("The Driver loaded error,please contact to your Software Designer!"); } //得到数据库链接 try { Properties props =new Properties(); props.put("remarksReporting","true"); props.put("user", userName); props.put("password", password); connection = DriverManager.getConnection(url, props); //connection = DriverManager.getConnection(url, userName, password); } catch (SQLException e) { e.printStackTrace(); } return connection; } /** * <p> * Description: [关闭数据库相关链接] * </p> * Created by [ZYY] [2017-4-26] Modified by [修改人] [修改时间] * @param connection 数据库链接 * @param pStatement PreparedStatement链接 * @param ResultSet ResultSet连接 */ public void closeConnection(Connection connection, PreparedStatement pStatement, ResultSet resultSet){ try { if (resultSet != null) { resultSet.close(); } if (pStatement != null) { pStatement.close(); } if (connection != null) { connection.close(); } } catch (Exception e) { e.printStackTrace(); } }}
4.创建几个辅助类ColumnVO ,TableVO , TimedTaskListener
package com.scheduler.util;/** * <p> * Title: manageplatform_[大数据管理平台]_[表字段VO] * </p> * <p> * Description: [数据表VO描述平台使用表字段的结构信息,包括字段名称,备注和字段数据类型] * </p> * * @author ZYY * @version $Revision$ 2017年4月25日 * @author (lastest modification by $Author$) * */public class ColumnVO { private String columnName;//字段名称 private String comment; //字段备注 private String type; //字段数据类型 public ColumnVO(){ } public String getColumnName() { return columnName; } public void setColumnName(String columnName) { this.columnName = columnName; } public String getComment() { return comment; } public void setComment(String comment) { this.comment = comment; } public String getType() { return type; } public void setType(String type) { this.type = type; }}
package com.scheduler.util;/** * <p> * Title: manageplatform_[大数据管理平台]_[数据表VO] * </p> * <p> * Description: [数据表VO描述平台使用源表的结构信息,包括表名称,备注名称和表类型(视图,表)] * </p> * * @author lxf * * @author (lastest modification by $Author$) * */public class TableVO { private String tableName; //名称 private String comment; //备注 private String type; //类型 public TableVO(){ } public String getTableName() { return tableName; } public void setTableName(String tableName) { this.tableName = tableName; } public String getComment() { return comment; } public void setComment(String comment) { this.comment = comment; } public String getType() { return type; } public void setType(String type) { this.type = type; }}
package com.scheduler.util;import org.springframework.context.ApplicationListener;import org.springframework.context.event.ContextRefreshedEvent;import com.scheduler.service.DataSynMysqlAndHive;import java.text.ParseException;import java.text.SimpleDateFormat;import java.util.Date;import java.util.Timer;import java.util.TimerTask;/** * 系统启动的时候,开始轮询用户与表的截止时期线程 */public class TimedTaskListener extends TimerTask implements ApplicationListener<ContextRefreshedEvent> { private static Date polling_start_time = null; //轮询开始时间 private static Long polling_interval_time = 0L; //轮询间隔时间 private static SimpleDateFormat yMd = new SimpleDateFormat("yyyy-MM-dd"); private static SimpleDateFormat yMdHms = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); private static DataSynMysqlAndHive dataSynMysqlAndHive; //需要执行的逻辑代码,当spring容器初始化完成后就会执行该方法。 @Override public void onApplicationEvent(ContextRefreshedEvent event) { dataSynMysqlAndHive = event.getApplicationContext().getBean(DataSynMysqlAndHive.class); try { //轮询开始时间初始化 polling_start_time = yMdHms.parse(yMd.format(new Date())+" "+Messages.getString("polling_start_time")); if(polling_start_time.getTime() < new Date().getTime()){ //如果轮询开始时间小于项目启动时间 polling_start_time = TimeHelper.dateAddDay(polling_start_time,1);//则将轮询时间推迟一天 } polling_interval_time = Long.parseLong(Messages.getString("polling_interval_time"));//轮询间隔时间 } catch (ParseException e) { e.printStackTrace(); } //定时器执行 new Timer().schedule(new TimedTaskListener(),polling_start_time,polling_interval_time); } @Override public void run() { System.out.println("轮询处理,当前时间为:"+yMdHms.format(new Date()).toString()); System.out.println("轮询已经起作用了!"); String data1 = dataSynMysqlAndHive.importHiveData("baseline"); String data2 = dataSynMysqlAndHive.exportHiveData("baseline"); System.out.println("执行结果:"+data1+",,"+data2); /* accountService.checkAndDealAllUserDeadline(); //检查并处理所有过期用户 userDataCatalogTableService.checkAndDealUserTableDeadline();//检查并处理所有过期表*/ }}
5.下面对上面代码进行分析
调用该方法
public String importHiveData(String sourceTableName) //传入同步的表名,本同步基于单表,若多张表可以循环表数组代码中有备注,以importHiveData为起始方法,一个个调就可以
1.判断有没有数据更新
2.初始化sqoop客户端并且得到sqoop连接
3.创建sqoop任务,任务类型为导入任务
4.更新increment sqoop Job配置
5.启动线程监控sqoop采集时长
6.开始sqoop任务采集,并返回sqoop任务采集状态 sqoop采集是数据从MySQL-->HDFS中
7.采集成功后将数据写入到hive中
阅读全文
0 0
- Java api 调用Sqoop2进行MySQL-->Hive的数据同步
- java调用hive api
- Sqoop2 Java客户端API指南
- Sqoop2 Java客户端API指南
- 使用Java调用ElasticSearch提供的相关API进行数据搜索完整实例演示
- 使用Java调用ElasticSearch提供的相关API进行数据搜索完整实例演示
- 使用Java调用ElasticSearch提供的相关API进行数据搜索完整实例演示
- 使用Java调用ElasticSearch提供的相关API进行数据搜索完整实例演示
- 使用Java调用ElasticSearch提供的相关API进行数据搜索完整实例演示
- sqoop同步mysql数据到hive中
- java同步mysql数据
- 2.x的版本hive的api调用增加,删除,添加数据
- Mysql 的数据同步
- MySQL的数据同步
- Sqoop2 Rest API
- Java调用Hive的操作
- 用Sqoop2在Mysql和hadoop导入导出数据
- Sqoop2 从MySQL导入数据到Hadoop HDFS
- HFOI2017.9.14 复习赛02题解
- React Native之ReactJs生命周期(四)
- Lifecycle启动源码分析
- SSM框架(Spring+SpringMVC+MyBatis)详细搭建过程
- Jquery EasyUI 获取其他文件数据(4)
- Java api 调用Sqoop2进行MySQL-->Hive的数据同步
- DAT.GUI-初次了解
- # Software-eng lab 1
- Effective Java中文第二章第6节(个人渣翻)
- get_mainarea()详解
- jquery获取 屏幕宽度以及网页可用宽度 以及 给图片src赋值 div赋值
- 记录一个常用设计模式的理解
- JDK源码分析--多线程同步工具CountDownLatch类
- 存储过程详解