Hadoop整合Hive之API封装及操作

来源:互联网 发布:大数据的教育弊端 编辑:程序博客网 时间:2024/06/08 13:14

首先看依赖

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">    <modelVersion>4.0.0</modelVersion>    <groupId>com.ta.hive</groupId>    <artifactId>tg_hive</artifactId>    <version>0.0.1-SNAPSHOT</version>    <packaging>jar</packaging>    <name>TG_hadoop_hive</name>    <url>http://maven.apache.org</url>    <properties>        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>        <junit.version>4.12</junit.version>        <hbase.version>1.1.2</hbase.version>        <hadoop.version>2.6.2</hadoop.version>    </properties>    <dependencies>        <dependency>            <groupId>junit</groupId>            <artifactId>junit</artifactId>            <version>${junit.version}</version>            <scope>test</scope>        </dependency>        <dependency>            <groupId>org.apache.hadoop</groupId>            <artifactId>hadoop-hdfs</artifactId>            <version>${hadoop.version}</version>            <scope>runtime</scope>        </dependency>        <dependency>            <groupId>mysql</groupId>            <artifactId>mysql-connector-java</artifactId>            <version>5.1.5</version>        </dependency>        <dependency>            <groupId>org.apache.hadoop</groupId>            <artifactId>hadoop-common</artifactId>            <version>${hadoop.version}</version>            <scope>runtime</scope>            <exclusions>                <exclusion>                    <artifactId>jdk.tools</artifactId>                    <groupId>jdk.tools</groupId>                </exclusion>            </exclusions>        </dependency>        <dependency>            <groupId>org.apache.hive</groupId>            <artifactId>hive-jdbc</artifactId>            <version>1.1.1</version>        </dependency>        <dependency>            <groupId>org.apache.hive</groupId>            <artifactId>hive-service</artifactId>            <version>1.1.1</version>            <exclusions>                <exclusion>                    <artifactId>eigenbase-properties</artifactId>                    <groupId>eigenbase</groupId>                </exclusion>                <exclusion>                    <artifactId>                        pentaho-aggdesigner-algorithm                    </artifactId>                    <groupId>org.pentaho</groupId>                </exclusion>            </exclusions>        </dependency>    </dependencies></project>

如果你的maven更新报错了,说明你的包有冲突,排除掉冲突的包即可

下面看看API的封装

package com.tg.hadoop.hive;import java.sql.Connection;import java.sql.DriverManager;import java.sql.ResultSet;import java.sql.SQLException;import java.sql.Statement;/** *  * @author 汤高 * */public class HiveAPI {    //网上写 org.apache.hadoop.hive.jdbc.HiveDriver ,新版本不能这样写    private static String driverName = "org.apache.hive.jdbc.HiveDriver";    //这里是hive2,网上其他人都写hive,在高版本中会报错    private static String url = "jdbc:hive2://master:10000/default";     private static String user = "hive";      private static String password = "hive";      private static String sql = "";      public static ResultSet countData(Statement stmt, String tableName)  {        sql = "select count(1) from " + tableName;        System.out.println("Running:" + sql);        ResultSet res=null;        try {            res = stmt.executeQuery(sql);            System.out.println("执行“regular hive query”运行结果:");            while (res.next()) {                System.out.println("count ------>" + res.getString(1));            }        } catch (SQLException e) {            e.printStackTrace();        }        return res;    }    public static ResultSet selectData(Statement stmt, String tableName)  {        sql = "select * from " + tableName;        System.out.println("Running:" + sql);        ResultSet res=null;        try {            res = stmt.executeQuery(sql);            System.out.println("执行 select * query 运行结果:");            while (res.next()) {                System.out.println(res.getInt(1) + "\t" + res.getString(2));            }        } catch (SQLException e) {            e.printStackTrace();        }        return res;    }    public static boolean loadData(Statement stmt, String tableName,String filepath) {        // 目录 ,我的是hive安装的机子的虚拟机的home目录下        sql = "load data local inpath '" + filepath + "' into table " + tableName;        System.out.println("Running:" + sql);        boolean result=false;        try {            result=stmt.execute(sql);        } catch (SQLException e) {            e.printStackTrace();        }        return result;    }    public static ResultSet describeTables(Statement stmt, String tableName)   {        sql = "describe " + tableName;        System.out.println("Running:" + sql);        ResultSet res=null;        try {            res = stmt.executeQuery(sql);            System.out.println("执行 describe table 运行结果:");            while (res.next()) {                System.out.println(res.getString(1) + "\t" + res.getString(2));            }        } catch (SQLException e) {            e.printStackTrace();        }        return res;    }    public static ResultSet showTables(Statement stmt, String tableName)  {        if(tableName==null||tableName.equals(null)){            sql = "show tables";        }else{            sql = "show tables '" + tableName + "'";        }        ResultSet res=null;        try {            res = stmt.executeQuery(sql);            System.out.println("执行 show tables 运行结果:");            while (res.next()) {                System.out.println(res.getString(1));            }        } catch (SQLException e) {            e.printStackTrace();        }        return res;    }    public static boolean createTable(Statement stmt, String tableName)  {        sql = "create table " + tableName + " (key int, value string)  row format delimited fields terminated by '\t'";        boolean result=false;        try {            result=stmt.execute(sql);        } catch (SQLException e) {            e.printStackTrace();        }        return result;    }    public static boolean dropTable(Statement stmt,String tableName) {        // 创建的表名        //String tableName = "testHive";        sql = "drop table  " + tableName;        boolean result=false;        try {            stmt.execute(sql);        } catch (SQLException e) {            e.printStackTrace();        }        return result;    }    public static Connection getConn()  {        Connection conn = null;        try {            Class.forName(driverName);            conn = DriverManager.getConnection(url, user, password);        } catch (ClassNotFoundException e) {            e.printStackTrace();        } catch (SQLException e) {            e.printStackTrace();        }        return conn;    }    public static void  close(Connection conn,Statement stmt){          try {            if (conn != null) {                    conn.close();                    conn = null;                }                if (stmt != null) {                    stmt.close();                    stmt = null;                }        } catch (SQLException e) {            e.printStackTrace();        }      }}

下面看看junit测试

package com.tg.hive.test;import static org.junit.Assert.*;import java.sql.Connection;import java.sql.ResultSet;import java.sql.SQLException;import java.sql.Statement;import org.junit.Before;import org.junit.Test;import com.tg.hadoop.hive.HiveAPI;/** *  * @author 汤高 * */public class TestHive {    private  Statement stmt = null;     private  Connection conn=null;    @Before    public void setConAndStatement (){       conn = HiveAPI.getConn();          try {            stmt = conn.createStatement();        } catch (SQLException e) {            e.printStackTrace();        }        assertNotNull(conn);    }    @Test    public void testDropTable() {        String tableName="testhive";        assertNotNull(HiveAPI.dropTable(stmt, tableName));    }    @Test    public void testCreateTable() {        boolean result=HiveAPI.createTable(stmt,"testhive");        assertNotNull(result);    }    @Test    public void testdescribeTables(){        ResultSet res=HiveAPI.describeTables(stmt, "testhive");        assertNotNull(res);    }    @Test    public void testshowTables(){        //ResultSet res=HiveAPI.showTables(stmt, "testhive");        ResultSet res=HiveAPI.showTables(stmt, null);        assertNotNull(res);    }    @Test    public void testloadData(){        boolean result=HiveAPI.loadData( stmt, "testhive","user.txt");        assertNotNull(result);    }    @Test    public  void testclose(){        HiveAPI.close(conn,stmt);    }    @Test    public  void testSelectData(){        ResultSet res=HiveAPI.selectData(stmt, "testhive");        assertNotNull(res);    }    @Test    public  void testCountData(){        ResultSet res=HiveAPI.countData(stmt, "testhive");        assertNotNull(res);    }}

上面是从本地导入文件到hive

从本地文件系统中将数据导入到Hive表的过程中,其实是先将数据临时复制到HDFS的一个目录下(典型的情况是复制到上传用户的HDFS home目录下,比如/home/hive/),然后再将数据从那个临时目录下移动(注意,这里说的是移动,不是复制!)到对应的Hive表的数据目录里面。既然如此,那么Hive肯定支持将数据直接从HDFS上的一个目录移动到相应Hive表的数据目录下,假设有下面这个文件/hive/user.txt,具体的操作如下:

首先上传一个user.txt文件到hdfs根目录

import java.sql.Connection;import java.sql.SQLException;import java.sql.Statement;public class Hive {     public static void main(String[] args) {              Connection conn = null;              Statement stmt = null;                  try {                    conn = HiveAPI.getConn();                      stmt = conn.createStatement();                      //HiveAPI.dropTable(stmt, "testhadoophive");                    //HiveAPI.createTable(stmt,"testhadoophive");                    HiveAPI.describeTables(stmt, "testhadoophive");                    //注意,这里的路径其实是  "hdfs://192.168.52.140:9000/hive/user.txt"                    HiveAPI.showTables(stmt, null);                    //但是路径只能写"/hive/user.txt"                    String path="/hive/user.txt";                    HiveAPI.loadDataFromHDFS( stmt, "testhadoophive",path);                    HiveAPI.selectData(stmt, "testhadoophive");                } catch (SQLException e) {                    e.printStackTrace();                }finally {                    //HiveAPI.close(conn,stmt);                }     }}

结果:导入hdfs文件到hive成功

[27 21:39:29,613 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 109Running:describe testhadoophive[27 21:39:29,636 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 130[27 21:39:29,679 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 109[27 21:39:29,695 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 100[27 21:39:29,712 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 53[27 21:39:29,730 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 102[27 21:39:29,733 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 273执行 describe table 运行结果:[27 21:39:29,781 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 112[27 21:39:29,787 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 179[27 21:39:29,812 DEBUG] org.apache.hive.jdbc.HiveQueryResultSet - Fetched row string: key int[27 21:39:29,813 DEBUG] org.apache.hive.jdbc.HiveQueryResultSet - Fetched row string: value   string[27 21:39:29,813 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 112[27 21:39:29,814 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 138[27 21:39:29,819 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 96[27 21:39:29,823 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 42[27 21:39:29,826 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 118[27 21:39:29,833 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 109[27 21:39:29,833 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 100[27 21:39:29,849 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 53[27 21:39:29,849 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 102[27 21:39:29,850 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 131执行 show tables 运行结果:[27 21:39:29,850 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 112[27 21:39:29,853 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 171[27 21:39:29,854 DEBUG] org.apache.hive.jdbc.HiveQueryResultSet - Fetched row string: tanggao[27 21:39:29,854 DEBUG] org.apache.hive.jdbc.HiveQueryResultSet - Fetched row string: test[27 21:39:29,854 DEBUG] org.apache.hive.jdbc.HiveQueryResultSet - Fetched row string: testhadoophive[27 21:39:29,854 DEBUG] org.apache.hive.jdbc.HiveQueryResultSet - Fetched row string: testhive[27 21:39:29,854 DEBUG] org.apache.hive.jdbc.HiveQueryResultSet - Fetched row string: testhive1[27 21:39:29,854 DEBUG] org.apache.hive.jdbc.HiveQueryResultSet - Fetched row string: testhive3[27 21:39:29,854 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 112[27 21:39:29,855 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 96Running:load data inpath '/hive/user.txt' into table testhadoophive[27 21:39:29,855 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 96[27 21:39:29,858 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 42[27 21:39:29,858 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 166[27 21:39:29,888 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 109[27 21:39:29,888 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 100[27 21:39:30,233 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 53Running:select * from testhadoophive[27 21:39:30,233 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 96[27 21:39:30,235 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 42[27 21:39:30,235 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 135[27 21:39:30,305 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 109[27 21:39:30,305 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 100[27 21:39:30,308 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 53[27 21:39:30,308 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 102[27 21:39:30,309 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 176执行 select * query 运行结果:[27 21:39:30,309 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 112[27 21:39:30,326 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 150[27 21:39:30,331 DEBUG] org.apache.hive.jdbc.HiveQueryResultSet - Fetched row string: 1   tanggao[27 21:39:30,331 DEBUG] org.apache.hive.jdbc.HiveQueryResultSet - Fetched row string: 2   zhousiyuan[27 21:39:30,331 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 112[27 21:39:30,333 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 117

现在你刚上传到hdfs上hive文件下的user.txt已经移动到hive指定的hdfs上的路径上去了 我的是user/hive/warehouse/

转载请指明出处http://blog.csdn.net/tanggao1314/article/details/51519503

0 0
原创粉丝点击