Hadoop编程之MapReduce操作Mysql数据库

来源：互联网发布：多梦疲劳知乎编辑：程序博客网时间：2024/05/01 02:36

1，首先建立mysql的数据库，表：如下图

下面是MapReduce编程了，需要导入mysql数据库驱动jar包；

首先需要自定义Recoder,这里是CourceRecord

public class CourceRecord implements Writable, DBWritable {
int id;
String name;
int count;
int no;

//从in流反序列化出内容
@Override
public void readFields(DataInput in) throws IOException {
this.id = in.readInt();
this.name = Text.readString(in);
this.count = in.readInt();
this.no = in.readInt();
}
//将内容序列化到out流
@Override
public void write(DataOutput out) throws IOException {
out.writeInt(this.id);
Text.writeString(out, this.name);
out.writeInt(this.count);
out.writeInt(this.no);
}
@Override
public void readFields(ResultSet result) throws SQLException {
this.id = result.getInt(1);
this.name = result.getString(2);
this.count = result.getInt(3);
this.no = result.getInt(4);
}
@Override
public void write(PreparedStatement stmt) throws SQLException {
stmt.setInt(1, this.id);
stmt.setString(2, this.name);
stmt.setInt(3, this.count);
stmt.setInt(4, this.no);
}
@Override
public String toString() {
return new String(this.name + " " + this.count + " " + this.no);
}
}

------------------------------------------------map类-----------------------------------

public class DBMapper extends MapReduceBase implements
Mapper<LongWritable, CourceRecord, LongWritable, Text> {
public void map(LongWritable key, CourceRecord value,
OutputCollector<LongWritable, Text> collector, Reporter reporter)
throws IOException {
collector.collect(new LongWritable(value.id),
new Text(value.toString()));
}
}

--------------------------------------driver驱动类DB------------------

public class DB {
public static void main(String[] args) throws IOException {
String[] argc={"jdbc:mysql://localhost/test","root", "111111"};

try {
JobConf conf = new JobConf(DB.class);
Class.forName("com.mysql.jdbc.Driver");
DBConfiguration.configureDB(conf, "com.mysql.jdbc.Driver",argc[0], argc[1], argc[2]);
String[] fields = { "id", "name", "count", "no" };
DBInputFormat.setInput(conf, CourceRecord.class, "tb", null,"id", fields);

conf.setInputFormat(DBInputFormat.class);
conf.setOutputKeyClass(LongWritable.class);
conf.setOutputValueClass(Text.class);


Path path = new Path("DBOUTPUT");
FileOutputFormat.setOutputPath(conf, path);

conf.setMapperClass(DBMapper.class);
conf.setReducerClass(IdentityReducer.class);
//如果文件存在则删除
FileSystem hdfs = path.getFileSystem(conf);
if (hdfs.exists(path)) {
hdfs.delete(path, true);
}

JobClient.runJob(conf);
}
catch(ClassNotFoundException e) {
System.err.println("mysql.jdbc.Driver not found");
}
}
------------------------------------------------------

运行：在DBOUTPUT目录下生成一个part-00000的文件，其内容为：

17 hello 28 17
18 hadoop 28 18
19 java 28 19
20 hive 16 20
21 hbase 18 21
22 pig 18 22

实例是跑起来了，主要是DB的一个配置，相关设置函数：

DBConfiguration.configureDB (JobConf job, String driverClass, String Url, String userName, String passwd)

DBInputFormat.setInput(JobConf job, Class<?extends DBWritable> inputClass, String tableName, String conditions,String orderBy, String fieldNames)

DBOutputFormat.setOut(JobConf job,String tablename,String fieldNames);

eg:DBOutputFormat.setOut(job,"tb",28,"mahout",30,401)