Hadoop 通过 Maven 用 Java API 对HDFS的读取/写入示例

来源:互联网 发布:linux换行 编辑:程序博客网 时间:2024/05/18 00:00

1. 读写源码

注意加注释的地方!!!
/** * App.java */package com.jiecxy;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FSDataInputStream;import org.apache.hadoop.fs.FSDataOutputStream;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import java.io.BufferedReader;import java.io.InputStreamReader;import java.net.URI;public class App {    public static void main( String[] args )    {        try {            Configuration conf = new Configuration();            // 不设置该代码会出现错误:java.io.IOException: No FileSystem for scheme: hdfs            conf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");            String filePath = "hdfs://127.0.0.1:9000/test/test.txt";            Path path = new Path(filePath);            // 这里需要设置URI,否则出现错误:java.lang.IllegalArgumentException: Wrong FS: hdfs://127.0.0.1:9000/test/test.txt, expected: file:///            FileSystem fs = FileSystem.get(new URI(filePath), conf);            System.out.println( "READING ============================" );            FSDataInputStream is = fs.open(path);            BufferedReader br = new BufferedReader(new InputStreamReader(is));            // 示例仅读取一行            String content = br.readLine();            System.out.println(content);            br.close();            System.out.println("WRITING ============================");            byte[] buff = "this is helloworld from java api!\n".getBytes();            FSDataOutputStream os = fs.create(path);            os.write(buff, 0, buff.length);            os.close();            fs.close();        } catch (Exception e) {            e.printStackTrace();        }    }}

运行结果:
这里写图片描述

2. pom.xml文件配置

注意:    1. 使用shade方式防止打包失败    2. 这里的dependency可以只引用 `hadoop-client`,或者同时引用`hadoop-common`和`hadoop-hdfs`
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">  <modelVersion>4.0.0</modelVersion>  <groupId>com.jiecxy</groupId>  <artifactId>HDFSTest</artifactId>  <version>1</version>  <packaging>jar</packaging>  <name>HDFSTest</name>  <url>http://maven.apache.org</url>  <properties>    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>  </properties>  <dependencies>      <dependency>          <groupId>org.apache.hadoop</groupId>          <artifactId>hadoop-client</artifactId>          <version>2.8.1</version>      </dependency>  </dependencies>    <build>        <plugins>            <plugin>                <groupId>org.apache.maven.plugins</groupId>                <artifactId>maven-shade-plugin</artifactId>                <executions>                    <!-- Run shade goal on package phase -->                    <execution>                        <phase>package</phase>                        <goals>                            <goal>shade</goal>                        </goals>                        <configuration>                            <filters>                                <filter>                                    <!-- Do not copy the signatures in the META-INF folder.                                    Otherwise, this might cause SecurityExceptions when using the JAR. -->                                    <artifact>*:*</artifact>                                    <excludes>                                        <exclude>META-INF/*.SF</exclude>                                        <exclude>META-INF/*.DSA</exclude>                                        <exclude>META-INF/*.RSA</exclude>                                    </excludes>                                </filter>                            </filters>                            <transformers>                                <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">                                    <mainClass>com.jiecxy.App</mainClass>                                </transformer>                            </transformers>                            <createDependencyReducedPom>false</createDependencyReducedPom>                        </configuration>                    </execution>                </executions>            </plugin>            <plugin>                <groupId>org.apache.maven.plugins</groupId>                <artifactId>maven-compiler-plugin</artifactId>                <configuration>                    <source>1.8</source>                    <target>1.8</target>                </configuration>            </plugin>        </plugins>    </build></project>


3.可能出现的问题

3.1 java.lang.IllegalArgumentException: Wrong FS

java.lang.IllegalArgumentException: Wrong FS: hdfs://127.0.0.1:9000/test/test.txt, expected: file:///    at org.apache.hadoop.fs.FileSystem.checkPath(FileSystem.java:666)    at org.apache.hadoop.fs.RawLocalFileSystem.pathToFile(RawLocalFileSystem.java:86)    at org.apache.hadoop.fs.RawLocalFileSystem.deprecatedGetFileStatus(RawLocalFileSystem.java:630)    at org.apache.hadoop.fs.RawLocalFileSystem.getFileLinkStatusInternal(RawLocalFileSystem.java:861)    at org.apache.hadoop.fs.RawLocalFileSystem.getFileStatus(RawLocalFileSystem.java:625)    at org.apache.hadoop.fs.FilterFileSystem.getFileStatus(FilterFileSystem.java:435)    at org.apache.hadoop.fs.ChecksumFileSystem$ChecksumFSInputChecker.<init>(ChecksumFileSystem.java:146)    at org.apache.hadoop.fs.ChecksumFileSystem.open(ChecksumFileSystem.java:347)    at org.apache.hadoop.fs.FileSystem.open(FileSystem.java:786)    at com.jiecxy.App.main(App.java:25)

解决方法:

FileSystem fs = FileSystem.get(conf);

改为:

FileSystem fs = FileSystem.get(new URI(filePath), conf);


3.2 java.io.IOException: No FileSystem for scheme: hdfs

java.io.IOException: No FileSystem for scheme: hdfs    at org.apache.hadoop.fs.FileSystem.getFileSystemClass(FileSystem.java:2798)    at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2809)    at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:100)    at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:2848)    at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:2830)    at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:389)    at com.jiecxy.App.main(App.java:24)

解决方法:
指定hdfs(若只因用了hadoop-common,则需要再加入依赖 hadoop-hdfs,否则找不到该类):

conf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");
原创粉丝点击