Eclipse环境下hive测试

来源:互联网 发布:类似于facerig的软件 编辑:程序博客网 时间:2024/06/06 01:35

本文所做示例是将hive表格中的数据读取出来并存入mysql数据库。前提是以mysql作为hive的元数据库已经调通,因为连接mysql数据库的时候所用的数据库,用户名和密码都要hive的配置文件一致。

1、在eclipse下建立工程,并将1)hive中lib目录下的所有包都导入到该项目。2)将hadoop中的相关包导入。在hadoop-2.x以前只要导入hadoop-xxx-core.jar就可以,hadoop-2.x以后的版本比较蛋疼,被分散在hadoop安装目录的share/hadoop/下的各个子目录里,以防万一全部导入。3)导入mysql的jdbc驱动包。

2、该项目第一个文件叫做ConnectDB,里面两个方法getHiveConnection和getMysqlConnection,分别返回连接到hive或者mysql数据库的一个connection。主要是在Class.forName中导入驱动文件和在getConnection获取连接的时候要对应相应的数据库,要与hive中的配置文件保持一致。

package com.scutcv.mrshen;import java.sql.Connection;import java.sql.DriverManager;import java.sql.SQLException;import org.apache.hadoop.hive.ql.parse.HiveParser_IdentifiersParser.nullCondition_return;import org.codehaus.groovy.ast.stmt.ThrowStatement;public class ConnectDB {private static Connection connection = null;private static Connection connection2mysql = null;private ConnectDB() {}public static Connection getHiveConnection() throws SQLException {if (connection == null) {try {Class.forName("org.apache.hadoop.hive.jdbc.HiveDriver");} catch (ClassNotFoundException e) {// TODO: handle exceptione.printStackTrace();System.exit(1);}connection = DriverManager.getConnection("jdbc:hive://master:50031/default","hive","hive");System.out.println("to hive: " + connection.toString());}return connection;}public static Connection getMysqlConnection() throws SQLException {if (connection2mysql == null) {try {Class.forName("com.mysql.jdbc.Driver");} catch (ClassNotFoundException e) {// TODO: handle exceptione.printStackTrace();System.exit(1);}connection2mysql = DriverManager.getConnection("jdbc:mysql://master:3306/hive?createDataBaseIfNotExist","hive","hive");System.out.println("to mysql: " + connection2mysql.toString());}return connection2mysql;}public static void closeHive() throws SQLException {if (connection != null) {connection.close();}}public static void closeMysql() throws SQLException {if (connection2mysql != null) {connection2mysql.close();}}}


3、建立工具类hiveUtil.java文件,里面包含了各种测试所要用到方法,比如建立表格,导入数据,查询并存入mysql数据库,打印结果等。

package com.scutcv.mrshen;import java.sql.Connection;import java.sql.ResultSet;import java.sql.SQLException;import java.sql.Statement;public class hiveUtil {public static void createTablseOrloadData(String hql)throws SQLException {Connection connection = ConnectDB.getHiveConnection();Statement statement = connection.createStatement();ResultSet resultSet = statement.executeQuery(hql);}public static ResultSet queryHive(String hql) throws SQLException {Connection connection = ConnectDB.getHiveConnection();Statement statement = connection.createStatement();ResultSet resultSet = statement.executeQuery(hql);return resultSet;}public static void hive2Mysql(ResultSet resultSet) throws SQLException {Connection connection = ConnectDB.getMysqlConnection();Statement statement = connection.createStatement();while(resultSet.next()) {/*int cid  = resultSet.getInt(1);int type = resultSet.getInt(3);int aid  = resultSet.getInt(5);String time    = resultSet.getString(2);String adv    = resultSet.getString(4);String conutry = resultSet.getString(6);String prov    = resultSet.getString(7);String city    = resultSet.getString(8);String snp     = resultSet.getString(9);String ptype   = resultSet.getString(10);String phone   = resultSet.getString(11);int stateCode = statement.executeUpdate(getSQLString(cid, time, type, adv, aid, conutry, prov, city, snp, ptype, phone));*/int key = resultSet.getInt(1);String val = resultSet.getString(2);int stateCode = statement.executeUpdate("insert into KVtest values(" + key + ",'" + val + "')");}}public static String getSQLString(int cid,   String time,   int type,   String adv,   int aid,   String country,   String prov,   String city,   String snp,   String ptype,   String phone) {String sql = "insert into dmining values(" + cid + "," + time + "," + type + "," + adv + "," + aid + "," +country + "," + prov + "," + city + "," + snp + "," + ptype + "," + phone + ")";return sql;}public static void printQueryResult(ResultSet resultSet) throws SQLException {while(resultSet.next()) {int cid  = resultSet.getInt(1);int type = resultSet.getInt(3);int aid  = resultSet.getInt(5);String time    = resultSet.getString(2);String adv    = resultSet.getString(4);String country = resultSet.getString(6);String prov    = resultSet.getString(7);String city    = resultSet.getString(8);String snp     = resultSet.getString(9);String ptype   = resultSet.getString(10);String phone   = resultSet.getString(11);System.out.println("cid: " + cid);System.out.println("time: " + time);System.out.println("ad_type: " + type);System.out.println("advertisment: " + adv);System.out.println("aid: " + aid);System.out.println("country: " + country);System.out.println("province: " + prov);System.out.println("city: " + city);System.out.println("service network provider: " + snp);System.out.println("phone_type: " + ptype);System.out.println("phone: " + phone + '\n');}}}

其中:

1)由于创建表格和导入数据逻辑步骤完全一样,只是查询的语句有区别,因此我把他们合并写作一个方法createTablesOrloadData。

2)queryHive方法返回hive的查询结果

3)hive2Mysql方法将方法2)中的结果通过迭代读取并插入mysql中的数据库。由于一开始我建立的表格要插入的数据含有中文导致乱码,因为只是测试,所以新加了printQueryResult方法将查询结果简单打印出来。为了节约时间我在hive中新建了表格kv2,并将hive的测试用例导入,因此注释掉的部分忽略不计。值得注意的是,在返回的查询结果resultSet中,取数据的时候下标是从1而不是从0开始的,一开始我从0读取,结果报了数据越界的异常。如下图。



4、新建测试文件execHive.java
package com.scutcv.mrshen;import java.sql.ResultSet;import java.sql.SQLException;public class execHive {public static void  main(String[] args) throws SQLException {ResultSet resultSet = hiveUtil.queryHive("select * from kv2");hiveUtil.hive2Mysql(resultSet);//hiveUtil.printQueryResult(resultSet);//String createHql = "create table kv2(id INT, name string)";//hiveUtil.createTablseOrloadData(createHql);//String loadHql = "load data local inpath '/home/hduser/hive-0.13.1/examples/files/kv2.txt' overwrite into table kv2";//hiveUtil.createTablseOrloadData(loadHql);ConnectDB.closeHive();ConnectDB.closeMysql();}}
上述是将hive的查询数据写入mysql的测试,另外注释掉的还有在hive中创建kv2,导入数据等。按需自取。在运行execHive之前必须将hive服务开起来,指定的端口号要和ConnectDB中获取hive连接的配置一致。



参考:实战Hadoop——开启通向云计算的捷径_刘鹏

0 0
原创粉丝点击