使用Hive编写MapReduce程序

来源：互联网发布：linux系统模拟下载编辑：程序博客网时间：2024/04/30 22:55

实验题目

使用Hive编写MapReduce程序

实验要求

在Eclipse中进行Java编码：

要求通过代码连接Hive服务器

通过代码建立数据库

通过代码创建数据表，并向其加载数据。

实验步骤

1.安装实验环境，启动Hadoop所有服务。

2.启动Hive服务器。

进入hive_home路径后，使用bin/hive --service hiveserver命令启动服务器。

3.进入Eclipse新建项目，并添加相关关联包。

其中jar文件为Hadoop_Home下所有的jar文件，以及Hive_Home/lib下所有的jar文件。

除此之外，还需要将Hive_Home/conf也添加进入。

这一步需要在Java项目下新建一个conf文件，然后将Hive_Home/conf下的所有文件复制到新建的conf中，再进行添加。

最下面的conf文件夹为新建的文件夹，内部的文件是从Hive_Home/conf中复制过来的。

4.编写源代码通过JDBC连接Hive服务器

import org.apache.hadoop.hive.service.ThriftHive;import org.apache.hadoop.hive.service.ThriftHive.Client;import org.apache.thrift.protocol.TBinaryProtocol;import org.apache.thrift.transport.TSocket;public class Hive_demo_01 {/*** @param args*/static TSocket transport;private static Client getClient(String hiveServer, Integer hivePort){final int SOME_BIG_NUMBER = 99999999;Client client=null;try {transport = new TSocket(hiveServer, hivePort);transport.setTimeout(SOME_BIG_NUMBER);transport.open();TBinaryProtocol protocol = new TBinaryProtocol(transport);client = new ThriftHive.Client(protocol);System.out.println("Connection is established");return client;}catch (Exception e) {e.printStackTrace();return null;}}public static void main(String[] args) {// TODO Auto-generated method stubString HIVE_SERVER = "localhost";Integer HIVE_PORT = new Integer(10000);Client client = getClient(HIVE_SERVER, HIVE_PORT);transport.close();}}

根据代码，运行成功不报错，会打印Connection is established。可见运行成功。

5.通过源代码创建jdbc_demo数据库。

创建数据库。

程序运行后终端窗口显示的内容。

6.编写源代码创建sample_data数据表，并向其加载数据。

package com.hive.demo;import java.util.List;import org.apache.hadoop.hive.service.ThriftHive;import org.apache.hadoop.hive.service.ThriftHive.Client;import org.apache.thrift.TException;import org.apache.thrift.protocol.TBinaryProtocol;import org.apache.hadoop.hive.service.HiveServerException;import org.apache.thrift.transport.TSocket;public class Hive_demo_01 {/** * @param args */static TSocket transport;private static Client getClient(String hiveServer, Integer hivePort) {final int SOME_BIG_NUMBER = 99999999;Client client = null;try {transport = new TSocket(hiveServer, hivePort);transport.setTimeout(SOME_BIG_NUMBER);transport.open();TBinaryProtocol protocol = new TBinaryProtocol(transport);client = new ThriftHive.Client(protocol);System.out.println("Connection is established");return client;} catch (Exception e) {e.printStackTrace();return null;}}private Client show_tables(Client c1) {try {try {c1.execute("show tables");} catch (TException e) {// TODO Auto-generated catch blocke.printStackTrace();}List<String> li = null;try {li = c1.fetchAll();} catch (TException e) {// TODO Auto-generated catch blocke.printStackTrace();}System.out.println(" *** The Tables List *** ");for (String string : li) {System.out.println(string);}System.out.println(" -------------------------------- ");Client c2 = c1;return c2;} finally {}}private Client create_tables(Client c1) {try {c1.execute("create table sample_data(name string)stored as textfile");} catch (HiveServerException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (TException e) {// TODO Auto-generated catch blocke.printStackTrace();}System.out.println(" *** sample_data tables is created *** ");System.out.println(" -------------------------------- ");Client c2 = c1;return c2;}private Client load_data(Client c1, String tbl_name) {try {c1.execute("load data local inpath '/home/wcbdd/Desktop/week5/sample.txt' into table "+tbl_name);} catch (HiveServerException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (TException e) {// TODO Auto-generated catch blocke.printStackTrace();}System.out.println(" *** loaded data into " + tbl_name + " *** ");System.out.println(" -------------------------------- ");Client c2 = c1;return c2;}public static void main(String[] args) {// TODO Auto-generated method stubString HIVE_SERVER = "localhost";Integer HIVE_PORT = new Integer(10000);Client client = getClient(HIVE_SERVER, HIVE_PORT);Hive_demo_01 obj = new Hive_demo_01();client = obj.show_tables(client);System.out.println(" Before Creating the table sample_data ");client = obj.create_tables(client);System.out.println(" After Creating the table sample_data ");client = obj.show_tables(client);System.out.println(" loading data into sample_data ");client = obj.load_data(client, "sample_data");transport.close();}}

需要将路径更改为文件的正确路径。

终端窗口的结果：

总结

本周学习了两种Hive的操作方式，其本质是很接近的。

需要总结Hive的常用命令。

0 0