java调用API操作HDFS
来源:互联网 发布:minitab软件免费下载 编辑:程序博客网 时间:2024/05/18 15:07
java调用API操作HDFS
本文介绍Java调用API从hdfs读取数据
package mongodb;import java.io.BufferedReader;import java.io.IOException;import java.io.InputStreamReader;import java.net.URI;import java.util.Arrays;import org.apache.commons.lang.StringUtils;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FSDataInputStream;import org.apache.hadoop.fs.FileStatus;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.compress.CompressionCodec;import org.apache.hadoop.io.compress.CompressionCodecFactory;import org.apache.hadoop.io.compress.CompressionInputStream;import org.apache.hadoop.util.ReflectionUtils;class Item implements Comparable{String value;double weight;public Item(String v){value=v;weight = Double.parseDouble(value.split(":")[1]);}public int compareTo(Object o){ return this.weight == ((Item) o).weight ? 0 : (this.weight > ((Item) o).weight ? -1 : 1); } }public class BatchUpdateSim {public static String parse(String str){//String str="9000320718001:1.0,2077635:1.0,2053809:1.0";String[] fields=str.split("\t");String[] valueArray= fields[1].split(",");Item[] items = new Item[valueArray.length];for(int i=0;i<items.length;i++){items[i]=new Item(valueArray[i]);}Arrays.sort(items);for(int i=0;i<valueArray.length;i++){valueArray[i] = "{"+items[i].value+"}";}String valueStr = StringUtils.join(valueArray,",");return "{\"key\":"+fields[0]+",\"values\":["+valueStr+"]}";}public static void main(String[] args) throws IOException, ClassNotFoundException {// TODO Auto-generated method stubConfiguration conf = new Configuration (); conf.addResource("/usr/local/hadoop/conf/core-site.xml"); conf.addResource("/usr/local/hadoop/conf/hdfs-site.xml"); conf.addResource("/usr/local/hadoop/conf/mapred-site.xml");//String HDFS="hdfs://webdm-cluster";//String HDFS="hdfs://localhost:9000"; String HDFS=args[0];FileSystem hdfs = FileSystem.get(URI.create(HDFS),conf); String filePath=args[1];FSDataInputStream fin = hdfs.open(new Path(filePath));//CompressionCodecFactory factory = new CompressionCodecFactory(conf); //CompressionCodec codec = factory.getCodec(new Path(filePath)); //根据hdfs文件的后缀类型自动识别//Class<?> codecClass = Class.forName("com.hadoop.compression.lzo.LzoCodec");//CompressionCodec codec = (CompressionCodec)ReflectionUtils.newInstance(codecClass, conf); BufferedReader reader = null;String line;int count=0;RecsysDb db = RecsysDb.getInstance();String itemSimName=args[2];try {// if (codec == null)// { reader = new BufferedReader(new InputStreamReader(fin)); // in = new BufferedReader(new InputStreamReader(fin, "UTF-8")); // } /* else { System.out.println("识别出压缩类型"); CompressionInputStream comInputStream = codec.createInputStream(fin); reader = new BufferedReader(new InputStreamReader(comInputStream)); } */ while ((line = reader.readLine()) != null) {//if(count==0) System.out.println(line);String strJson=parse(line);db.updateItemSim(itemSimName, strJson);count++;if(count%1000==0)System.out.println("count:"+count);}} finally {if (reader != null) {reader.close();}System.out.println(count);}}}
下面是一个shell脚本,用Java命令执行上面的程序的话,需要加载各种Hadoop相关的jar包。
(PS:后来发现有种更好的方法,就是用Hadoop命令执行,因为用Hadoop命令执行,Hadoop自己会加载一些jar包,无需自己再手动加载)
hd_core="/usr/local/hadoop/hadoop-core-1.1.2.jar";s4j="/usr/local/hadoop/lib/slf4j-log4j12-1.6.1.jar";s4japi="/usr/local/hadoop/lib/slf4j-api-1.6.1.jar";log4j="/usr/local/hadoop/lib/log4j-1.2.17.jar";guva="/usr/local/hadoop/lib/guava-11.0.2.jar";clog="/usr/local/hadoop/lib/commons-logging-1.1.1.jar";cconf="/usr/local/hadoop/lib/commons-configuration-1.6.jar";cl="/usr/local/hadoop/lib/commons-lang-2.5.jar";ccli="/usr/local/hadoop/lib/commons-cli-1.2.jar";protobuf="/usr/local/hadoop/lib/protobuf-java-2.4.0a.jar";hdfs="/usr/local/hadoop/lib/hadoop-hdfs-1.1.2.jar";mongodb="/data/home/liulian/linger/jars/mongo-java-driver-2.12.4.jar";libs=($hd_core$s4j$s4japi$log4j$guva$clog$cconf$cl$ccli$protobuf$hdfs$mongodb)libstr=""for jarlib in ${libs[@]};do libstr=${jarlib}":"${libstr}doneecho $libstr;java -Xbootclasspath/a:${libstr} -jar ../jars/updateSim.jar hdfs://10.200.91.164:9000 tv_sim/result/000000_0 tvItemsimColl
本文链接:http://blog.csdn.net/lingerlanlan/article/details/42178675
本文作者:linger
1 0
- java调用API操作HDFS
- Java API操作HDFS
- Java API操作HDFS
- hdfs java api操作
- java api操作HDFS
- HDFS JAVA API操作
- java 操作hdfs api
- java api 操作 hdfs
- Hadoop学习二(java api调用操作HDFS)
- 调用java api 访问HDFS
- JAVA操作HDFS API(hadoop)
- JAVA操作HDFS API(hadoop)
- HDFS的JAVA API操作
- JAVA API操作HDFS文件系统
- Java 封装 HDFS API 操作
- JAVA API操作HDFS文件系统
- java API 操作HDFS文件系统
- HDFS的JAVA API 操作
- VS2010 调用fopen函数读取bmp文件时,提示“CXX0030:错误,无法计算表达式的值,错误的指针”
- The connection to adb is down, and a severe error has occured.问题解决
- 求前缀表达式的值
- Redis存储系统-安装与测试
- bbblack sd卡内存问题
- java调用API操作HDFS
- 大学生感恩教育的内容、意义及教育体系构建
- python的httplib等包
- 内存分布图(X86)
- linux后端诊断与调试技术
- [水题][第一阶段-简单操作][HDOJ-2009]求数列的和
- python开发之图片转字符画
- 浅析为什么char类型的范围是 —128~+127
- 随笔