Hadoop整合Hive之API封装及操作
来源:互联网 发布:大数据的教育弊端 编辑:程序博客网 时间:2024/06/08 13:14
首先看依赖
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>com.ta.hive</groupId> <artifactId>tg_hive</artifactId> <version>0.0.1-SNAPSHOT</version> <packaging>jar</packaging> <name>TG_hadoop_hive</name> <url>http://maven.apache.org</url> <properties> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <junit.version>4.12</junit.version> <hbase.version>1.1.2</hbase.version> <hadoop.version>2.6.2</hadoop.version> </properties> <dependencies> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>${junit.version}</version> <scope>test</scope> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-hdfs</artifactId> <version>${hadoop.version}</version> <scope>runtime</scope> </dependency> <dependency> <groupId>mysql</groupId> <artifactId>mysql-connector-java</artifactId> <version>5.1.5</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-common</artifactId> <version>${hadoop.version}</version> <scope>runtime</scope> <exclusions> <exclusion> <artifactId>jdk.tools</artifactId> <groupId>jdk.tools</groupId> </exclusion> </exclusions> </dependency> <dependency> <groupId>org.apache.hive</groupId> <artifactId>hive-jdbc</artifactId> <version>1.1.1</version> </dependency> <dependency> <groupId>org.apache.hive</groupId> <artifactId>hive-service</artifactId> <version>1.1.1</version> <exclusions> <exclusion> <artifactId>eigenbase-properties</artifactId> <groupId>eigenbase</groupId> </exclusion> <exclusion> <artifactId> pentaho-aggdesigner-algorithm </artifactId> <groupId>org.pentaho</groupId> </exclusion> </exclusions> </dependency> </dependencies></project>
如果你的maven更新报错了,说明你的包有冲突,排除掉冲突的包即可
下面看看API的封装
package com.tg.hadoop.hive;import java.sql.Connection;import java.sql.DriverManager;import java.sql.ResultSet;import java.sql.SQLException;import java.sql.Statement;/** * * @author 汤高 * */public class HiveAPI { //网上写 org.apache.hadoop.hive.jdbc.HiveDriver ,新版本不能这样写 private static String driverName = "org.apache.hive.jdbc.HiveDriver"; //这里是hive2,网上其他人都写hive,在高版本中会报错 private static String url = "jdbc:hive2://master:10000/default"; private static String user = "hive"; private static String password = "hive"; private static String sql = ""; public static ResultSet countData(Statement stmt, String tableName) { sql = "select count(1) from " + tableName; System.out.println("Running:" + sql); ResultSet res=null; try { res = stmt.executeQuery(sql); System.out.println("执行“regular hive query”运行结果:"); while (res.next()) { System.out.println("count ------>" + res.getString(1)); } } catch (SQLException e) { e.printStackTrace(); } return res; } public static ResultSet selectData(Statement stmt, String tableName) { sql = "select * from " + tableName; System.out.println("Running:" + sql); ResultSet res=null; try { res = stmt.executeQuery(sql); System.out.println("执行 select * query 运行结果:"); while (res.next()) { System.out.println(res.getInt(1) + "\t" + res.getString(2)); } } catch (SQLException e) { e.printStackTrace(); } return res; } public static boolean loadData(Statement stmt, String tableName,String filepath) { // 目录 ,我的是hive安装的机子的虚拟机的home目录下 sql = "load data local inpath '" + filepath + "' into table " + tableName; System.out.println("Running:" + sql); boolean result=false; try { result=stmt.execute(sql); } catch (SQLException e) { e.printStackTrace(); } return result; } public static ResultSet describeTables(Statement stmt, String tableName) { sql = "describe " + tableName; System.out.println("Running:" + sql); ResultSet res=null; try { res = stmt.executeQuery(sql); System.out.println("执行 describe table 运行结果:"); while (res.next()) { System.out.println(res.getString(1) + "\t" + res.getString(2)); } } catch (SQLException e) { e.printStackTrace(); } return res; } public static ResultSet showTables(Statement stmt, String tableName) { if(tableName==null||tableName.equals(null)){ sql = "show tables"; }else{ sql = "show tables '" + tableName + "'"; } ResultSet res=null; try { res = stmt.executeQuery(sql); System.out.println("执行 show tables 运行结果:"); while (res.next()) { System.out.println(res.getString(1)); } } catch (SQLException e) { e.printStackTrace(); } return res; } public static boolean createTable(Statement stmt, String tableName) { sql = "create table " + tableName + " (key int, value string) row format delimited fields terminated by '\t'"; boolean result=false; try { result=stmt.execute(sql); } catch (SQLException e) { e.printStackTrace(); } return result; } public static boolean dropTable(Statement stmt,String tableName) { // 创建的表名 //String tableName = "testHive"; sql = "drop table " + tableName; boolean result=false; try { stmt.execute(sql); } catch (SQLException e) { e.printStackTrace(); } return result; } public static Connection getConn() { Connection conn = null; try { Class.forName(driverName); conn = DriverManager.getConnection(url, user, password); } catch (ClassNotFoundException e) { e.printStackTrace(); } catch (SQLException e) { e.printStackTrace(); } return conn; } public static void close(Connection conn,Statement stmt){ try { if (conn != null) { conn.close(); conn = null; } if (stmt != null) { stmt.close(); stmt = null; } } catch (SQLException e) { e.printStackTrace(); } }}
下面看看junit测试
package com.tg.hive.test;import static org.junit.Assert.*;import java.sql.Connection;import java.sql.ResultSet;import java.sql.SQLException;import java.sql.Statement;import org.junit.Before;import org.junit.Test;import com.tg.hadoop.hive.HiveAPI;/** * * @author 汤高 * */public class TestHive { private Statement stmt = null; private Connection conn=null; @Before public void setConAndStatement (){ conn = HiveAPI.getConn(); try { stmt = conn.createStatement(); } catch (SQLException e) { e.printStackTrace(); } assertNotNull(conn); } @Test public void testDropTable() { String tableName="testhive"; assertNotNull(HiveAPI.dropTable(stmt, tableName)); } @Test public void testCreateTable() { boolean result=HiveAPI.createTable(stmt,"testhive"); assertNotNull(result); } @Test public void testdescribeTables(){ ResultSet res=HiveAPI.describeTables(stmt, "testhive"); assertNotNull(res); } @Test public void testshowTables(){ //ResultSet res=HiveAPI.showTables(stmt, "testhive"); ResultSet res=HiveAPI.showTables(stmt, null); assertNotNull(res); } @Test public void testloadData(){ boolean result=HiveAPI.loadData( stmt, "testhive","user.txt"); assertNotNull(result); } @Test public void testclose(){ HiveAPI.close(conn,stmt); } @Test public void testSelectData(){ ResultSet res=HiveAPI.selectData(stmt, "testhive"); assertNotNull(res); } @Test public void testCountData(){ ResultSet res=HiveAPI.countData(stmt, "testhive"); assertNotNull(res); }}
上面是从本地导入文件到hive
从本地文件系统中将数据导入到Hive表的过程中,其实是先将数据临时复制到HDFS的一个目录下(典型的情况是复制到上传用户的HDFS home目录下,比如/home/hive/),然后再将数据从那个临时目录下移动(注意,这里说的是移动,不是复制!)到对应的Hive表的数据目录里面。既然如此,那么Hive肯定支持将数据直接从HDFS上的一个目录移动到相应Hive表的数据目录下,假设有下面这个文件/hive/user.txt,具体的操作如下:
首先上传一个user.txt文件到hdfs根目录
import java.sql.Connection;import java.sql.SQLException;import java.sql.Statement;public class Hive { public static void main(String[] args) { Connection conn = null; Statement stmt = null; try { conn = HiveAPI.getConn(); stmt = conn.createStatement(); //HiveAPI.dropTable(stmt, "testhadoophive"); //HiveAPI.createTable(stmt,"testhadoophive"); HiveAPI.describeTables(stmt, "testhadoophive"); //注意,这里的路径其实是 "hdfs://192.168.52.140:9000/hive/user.txt" HiveAPI.showTables(stmt, null); //但是路径只能写"/hive/user.txt" String path="/hive/user.txt"; HiveAPI.loadDataFromHDFS( stmt, "testhadoophive",path); HiveAPI.selectData(stmt, "testhadoophive"); } catch (SQLException e) { e.printStackTrace(); }finally { //HiveAPI.close(conn,stmt); } }}
结果:导入hdfs文件到hive成功
[27 21:39:29,613 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 109Running:describe testhadoophive[27 21:39:29,636 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 130[27 21:39:29,679 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 109[27 21:39:29,695 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 100[27 21:39:29,712 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 53[27 21:39:29,730 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 102[27 21:39:29,733 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 273执行 describe table 运行结果:[27 21:39:29,781 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 112[27 21:39:29,787 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 179[27 21:39:29,812 DEBUG] org.apache.hive.jdbc.HiveQueryResultSet - Fetched row string: key int[27 21:39:29,813 DEBUG] org.apache.hive.jdbc.HiveQueryResultSet - Fetched row string: value string[27 21:39:29,813 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 112[27 21:39:29,814 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 138[27 21:39:29,819 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 96[27 21:39:29,823 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 42[27 21:39:29,826 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 118[27 21:39:29,833 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 109[27 21:39:29,833 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 100[27 21:39:29,849 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 53[27 21:39:29,849 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 102[27 21:39:29,850 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 131执行 show tables 运行结果:[27 21:39:29,850 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 112[27 21:39:29,853 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 171[27 21:39:29,854 DEBUG] org.apache.hive.jdbc.HiveQueryResultSet - Fetched row string: tanggao[27 21:39:29,854 DEBUG] org.apache.hive.jdbc.HiveQueryResultSet - Fetched row string: test[27 21:39:29,854 DEBUG] org.apache.hive.jdbc.HiveQueryResultSet - Fetched row string: testhadoophive[27 21:39:29,854 DEBUG] org.apache.hive.jdbc.HiveQueryResultSet - Fetched row string: testhive[27 21:39:29,854 DEBUG] org.apache.hive.jdbc.HiveQueryResultSet - Fetched row string: testhive1[27 21:39:29,854 DEBUG] org.apache.hive.jdbc.HiveQueryResultSet - Fetched row string: testhive3[27 21:39:29,854 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 112[27 21:39:29,855 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 96Running:load data inpath '/hive/user.txt' into table testhadoophive[27 21:39:29,855 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 96[27 21:39:29,858 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 42[27 21:39:29,858 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 166[27 21:39:29,888 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 109[27 21:39:29,888 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 100[27 21:39:30,233 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 53Running:select * from testhadoophive[27 21:39:30,233 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 96[27 21:39:30,235 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 42[27 21:39:30,235 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 135[27 21:39:30,305 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 109[27 21:39:30,305 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 100[27 21:39:30,308 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 53[27 21:39:30,308 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 102[27 21:39:30,309 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 176执行 select * query 运行结果:[27 21:39:30,309 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 112[27 21:39:30,326 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 150[27 21:39:30,331 DEBUG] org.apache.hive.jdbc.HiveQueryResultSet - Fetched row string: 1 tanggao[27 21:39:30,331 DEBUG] org.apache.hive.jdbc.HiveQueryResultSet - Fetched row string: 2 zhousiyuan[27 21:39:30,331 DEBUG] org.apache.thrift.transport.TSaslTransport - writing data length: 112[27 21:39:30,333 DEBUG] org.apache.thrift.transport.TSaslTransport - CLIENT: reading data length: 117
现在你刚上传到hdfs上hive文件下的user.txt已经移动到hive指定的hdfs上的路径上去了 我的是user/hive/warehouse/
转载请指明出处http://blog.csdn.net/tanggao1314/article/details/51519503
0 0
- Hadoop整合Hive之API封装及操作
- Hadoop(四)Hive整合HBase及JDBC方式操作Hive
- Hive 之 Java API 操作
- Hadoop之通过Java Api连接Hive
- hadoop学习笔记-hive安装及操作
- HADOOP、HIVE、HBASE整合
- hadoop API之:文件操作
- hadoop API之:文件操作
- hadoop,hbase,hive,zookeeper整合可行性分析及版本确定
- Hive之——整合MySQL存储元数据信息及基本操作示例
- Hadoop学习笔记之操作hive
- hadoop学习之-hive-数据操作
- Hadoop之Hive架构详解及应用
- Hadoop Hive与Hbase整合
- Hadoop Hive与Hbase整合
- Hadoop Hive与Hbase整合
- Hadoop Hive与Hbase整合
- Hadoop Hive与Hbase整合
- Html5学习之路 基础
- Mysql 数据类型
- Vim 高级用法 1 点的妙用
- hdu3639 Hawk-and-Chicken(Tarjan缩点+反图搜索)
- AndroidAnnotations(Code Diet)android快速开发框架
- Hadoop整合Hive之API封装及操作
- Mysql 日志
- Hibernate(三)
- 145. Binary Tree Postorder Traversal
- Mysql 性能优化
- PETS-ICVS Datasets 数据集
- UITabelView分组样式时如何调整组之间的间距
- 211. Add and Search Word - Data structure design【M】【91】
- Learning OpenCV: read video and add onTrackSlider