使用DBInputFormat作为输入数据源

来源:互联网 发布:阿里云服务器建站助手 编辑:程序博客网 时间:2024/05/18 02:22

该代码展示了如何在hadoop中与数据库连接,并将数据库中的数据作为数据源输入进行分析。

package com.ls.hadoopApp;import java.io.DataInput;import java.io.DataOutput;import java.io.IOException;import java.sql.PreparedStatement;import java.sql.ResultSet;import java.sql.SQLException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.conf.Configured;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.Text;import org.apache.hadoop.io.Writable;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.lib.db.DBConfiguration;import org.apache.hadoop.mapreduce.lib.db.DBInputFormat;import org.apache.hadoop.mapreduce.lib.db.DBWritable;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.util.Tool;import org.apache.hadoop.util.ToolRunner;/*** * 使用DBInputFormat作为输入, * 需要将驱动类放置到各个节点下,如报找不到驱动类错误则需要重启集群 */public class ConnectMysql extends Configured implements Tool{static String OUT_PATH = "hdfs://hadoop:9000/out";public static void main(String[] args) throws Exception{ToolRunner.run(new ConnectMysql(), args);}@Overridepublic int run(String[] arg0) throws Exception {Configuration conf = new Configuration();//设置连接所需要的驱动,连接地址,数据库名,用户名及密码DBConfiguration.configureDB(conf, "com.mysql.jdbc.Driver", "jdbc:mysql://hadoop:3306/lee", "root", "xiao");Job job = new Job(conf,ConnectMysql.class.getName());job.setJarByClass(ConnectMysql.class);job.setInputFormatClass(DBInputFormat.class);//设置与数据库中的表对应的的映射类、数据表以及表中的字段信息DBInputFormat.setInput(job, StudentInfo.class, "student", null, null, "id","name");job.setNumReduceTasks(0);job.setOutputKeyClass(Text.class);job.setOutputKeyClass(Text.class);FileOutputFormat.setOutputPath(job, new Path(OUT_PATH));job.waitForCompletion(true);return 0;}}class ConnectMysqlMapper extends Mapper<Text,StudentInfo,Text,Text>{@Overrideprotected void map(Text key, StudentInfo value,Context context)throws IOException, InterruptedException {String arr[] = value.toString().split("\t");for(String val: arr){System.out.println(val);context.write(new Text(val), new Text(val));}}}/*** * 与数据库映射的类需要继承Writable和DBWritable接口 */class StudentInfo implements Writable,DBWritable{String id;String name;//int age;@Overridepublic void readFields(ResultSet result) throws SQLException {this.id = result.getString(1);this.name = result.getString(2);//this.age = result.getInt(3);}@Overridepublic void write(PreparedStatement prep) throws SQLException {prep.setString(1, id);prep.setString(2, name);//prep.setInt(3, age);}@Overridepublic void readFields(DataInput in) throws IOException {this.id = Text.readString(in);this.name = Text.readString(in);//this.age = in.readInt();}@Overridepublic void write(DataOutput out) throws IOException {Text.writeString(out, id);Text.writeString(out,name);//out.writeInt(age);}public String toString(){return id+"\t"+name;}}

0 0
原创粉丝点击