学习HBase与HDFS的基本编程使用

来源：互联网发布：app开发团队编程人数编辑：程序博客网时间：2024/06/05 19:28

HBase – Hadoop Database，是一个高可靠性、高性能、面向列、可伸缩的分布式存储系统，利用HBase技术可在廉价PC Server上搭建起大规模结构化存储集群。

HBase是Google Bigtable的开源实现，类似Google Bigtable利用GFS作为其文件存储系统，HBase利用Hadoop HDFS作为其文件存储系统；Google运行MapReduce来处理Bigtable中的海量数据，HBase同样利用Hadoop MapReduce来处理HBase中的海量数据；Google Bigtable利用 Chubby作为协同服务，HBase利用Zookeeper作为对应。

HDFS:Hadoop分布式文件系统被设计成适合运行在通用硬件(commodity hardware)上的分布式文件系统。它和现有的分布式文件系统有很多共同点。但同时，它和其他的分布式文件系统的区别也是很明显的。HDFS是一个高度容错性的系统，适合部署在廉价的机器上。HDFS能提供高吞吐量的数据访问，非常适合大规模数据集上的应用。HDFS放宽了一部分POSIX约束，来实现流式读取文件系统数据的目的。HDFS在最开始是作为Apache Nutch搜索引擎项目的基础架构而开发的。HDFS是Apache Hadoop Core项目的一部分。

关于hbase和hdfs的一些基本操作：1. start hdfs and hbase$ start-dfs.sh$ start-hbase.sh2. stop hdfs and hbase$ stop-hbase.sh$ stop-dfs.sh3. hdfs directory is ~/work/hdfs4. To compile your java code MyCode.java (implementing class MyCode)$ javac MyCodethen to run it$ java MyCode <args>5. compile and run HDFSTest.java$  javac HDFSTest.java$  java HDFSTest6. compile and run HBaseTest.java$  javac HBaseTest.java $  java HBaseTestcheck if we have successfully create mytable and put the new rowstart hbase shell and run command in hbase shell$ hbase shellhbase(main):001:0> scan 'mytable'ROW                                                  COLUMN+CELL                                                                                                                                              abc                                                 column=mycf:a, timestamp=1428459927307, value=789                                                                                                       1 row(s) in 1.8950 secondshbase(main):002:0> disable 'mytable'0 row(s) in 1.9050 secondshbase(main):003:0> drop 'mytable'0 row(s) in 1.2320 secondshbase(main):004:0> exit

其中HBaseTest.java代码如下：/* * Make sure that the classpath contains all the hbase libraries * * Compile: *  javac HBaseTest.java * * Run:  *  java HBaseTest */import java.io.IOException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.hbase.HBaseConfiguration;import org.apache.hadoop.hbase.HColumnDescriptor;import org.apache.hadoop.hbase.HTableDescriptor;import org.apache.hadoop.hbase.MasterNotRunningException;import org.apache.hadoop.hbase.TableName;import org.apache.hadoop.hbase.ZooKeeperConnectionException;import org.apache.hadoop.hbase.client.HBaseAdmin;import org.apache.hadoop.hbase.client.HTable;import org.apache.hadoop.hbase.client.Put;import org.apache.log4j.*;public class HBaseTest {  public static void main(String[] args) throws MasterNotRunningException, ZooKeeperConnectionException, IOException {    Logger.getRootLogger().setLevel(Level.WARN);    // create table descriptor    String tableName= "mytable";    HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(tableName));    // create column descriptor    HColumnDescriptor cf = new HColumnDescriptor("mycf");    htd.addFamily(cf);    // configure HBase    Configuration configuration = HBaseConfiguration.create();    HBaseAdmin hAdmin = new HBaseAdmin(configuration);    if (hAdmin.tableExists(tableName)) {        System.out.println("Table already exists");    }    else {        hAdmin.createTable(htd);        System.out.println("table "+tableName+ " created successfully");    }    hAdmin.close();    // put "mytable","abc","mycf:a","789"    HTable table = new HTable(configuration,tableName);    Put put = new Put("abc".getBytes());    put.add("mycf".getBytes(),"a".getBytes(),"789".getBytes());    table.put(put);    table.close();    System.out.println("put successfully");  }}

HDFS代码如下：import java.io.*;import java.net.URI;import java.net.URISyntaxException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FSDataInputStream;import org.apache.hadoop.fs.FSDataOutputStream;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IOUtils;/** *complie HDFSTest.java * * javac HDFSTest.java  * *execute HDFSTest.java * * java HDFSTest   *  */public class HDFSTest {    public static void main(String[] args) throws IOException, URISyntaxException{        String file= "hdfs://localhost:9000/hw1/README.txt";        Configuration conf = new Configuration();        FileSystem fs = FileSystem.get(URI.create(file), conf);        Path path = new Path(file);        FSDataInputStream in_stream = fs.open(path);        BufferedReader in = new BufferedReader(new InputStreamReader(in_stream));        String s;        while ((s=in.readLine())!=null) {             System.out.println(s);        }        in.close();        fs.close();    }}

作业内容：学习HBase和HDFS的基本编程使用

具体要求：

//根据distinct key排序，然后相同的会在一起，在输出中把每一个和前一个比较，如果相同则跳过，如果不同则输出。

import java.util.ArrayList;import java.util.List;import java.io.*;import java.util.Comparator;import java.util.Collections;import java.net.URI;import java.net.URISyntaxException;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FSDataInputStream;import org.apache.hadoop.fs.FSDataOutputStream;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IOUtils;import org.apache.hadoop.hbase.HBaseConfiguration;import org.apache.hadoop.hbase.HColumnDescriptor;import org.apache.hadoop.hbase.HTableDescriptor;import org.apache.hadoop.hbase.MasterNotRunningException;import org.apache.hadoop.hbase.TableName;import org.apache.hadoop.hbase.ZooKeeperConnectionException;import org.apache.hadoop.hbase.client.HBaseAdmin;import org.apache.hadoop.hbase.client.HTable;import org.apache.hadoop.hbase.client.Put;import org.apache.log4j.*;/*@author wanqeiqiang *@Time 2017-3-28 18:45*/class ComparatorString implements Comparator<Object>{        /*build the Comparator         *@return -1,0,1*/        public int compare(Object o1,Object o2)        {            String[] a=(String[])o1;            String[] b=(String[])o2;            int flag=0;            //Compare each attribute in turn            for(int i=0;i<a.length;i++)            {                flag=a[i].compareTo(b[i]);                //if a[i]==b[i],then compare a[i+1] with b[i+1]                if(flag==0)                {                     if(i<a.length-1)                       { ; }                      if(i==a.length-1)                       {break;}                }                else                     { break;}            }            return flag;        }}public class Hw1Grp5 {    public static void main(String[] args) throws IOException, URISyntaxException{        //obtain the arg R=<file>        String file= "hdfs://localhost:9000"+args[0].substring(2);        //obtain the arg select:R...        String[] select=args[1].substring(7).split(",");        String select_R=select[0].substring(1);        int select_Ri=Integer.parseInt(select_R);        double select_number=Double.parseDouble(select[2]);        //obtain the arg distinct:...        String[] distinct=args[2].substring(9).split(",");        int len=distinct.length;        int[] distinct_number=new int[len];        for(int i=0;i<len;i++)        {           String distinct_R=distinct[i].substring(1);           distinct_number[i]=Integer.parseInt(distinct_R);        }                Configuration conf = new Configuration();        FileSystem fs = FileSystem.get(URI.create(file), conf);        Path path = new Path(file);        FSDataInputStream in_stream = fs.open(path);        BufferedReader in = new BufferedReader(new InputStreamReader(in_stream));        List <String[]> lst = new ArrayList <String[]> ();                /*read the hdfs file and select the satisfying records to lst;          *if arg is "gt",select the item which Ri>select_number;         *if arg is "ge",select the item which Ri>=select_number;         *if arg is "eq",select the item which Ri==select_number;         *if arg is "ne",select the item which Ri!=select_number;         *if arg is "le",select the item which Ri<=select_number;         *if arg is "lt",select the item which Ri<select_number;*/        String s;        if(select[1].equals("gt"))        {           while ((s=in.readLine())!=null) {                String[] line=s.split("\\|");                 double line_Ri=Double.parseDouble(line[select_Ri]);                if(line_Ri>select_number)                {                    String[] line_distinct=new String[distinct_number.length];                    int j=0;                    for(int i:distinct_number)                    {                        line_distinct[j]=line[i];                        j++;                    }                    lst.add(line_distinct);                }             }         }        if(select[1].equals("ge"))        {           while ((s=in.readLine())!=null) {                String[] line=s.split("\\|");                double line_Ri=Double.parseDouble(line[select_Ri]);                if(line_Ri>=select_number)                {                    String[] line_distinct=new String[distinct_number.length];                    int j=0;                    for(int i:distinct_number)                    {                        line_distinct[j]=line[i];                        j++;                    }                    lst.add(line_distinct);                }             }         }        if(select[1].equals("eq"))        {           while ((s=in.readLine())!=null) {                String[] line=s.split("\\|");                double line_Ri=Double.parseDouble(line[select_Ri]);                if(line_Ri==select_number)                {                    String[] line_distinct=new String[distinct_number.length];                    int j=0;                    for(int i:distinct_number)                    {                        line_distinct[j]=line[i];                        j++;                    }                    lst.add(line_distinct);                }             }         }        if(select[1].equals("ne"))        {           while ((s=in.readLine())!=null) {                String[] line=s.split("\\|");                double line_Ri=Double.parseDouble(line[select_Ri]);                if(line_Ri!=select_number)                {                    String[] line_distinct=new String[distinct_number.length];                    int j=0;                    for(int i:distinct_number)                    {                        line_distinct[j]=line[i];                        j++;                    }                    lst.add(line_distinct);                }             }         }        if(select[1].equals("le"))        {           while ((s=in.readLine())!=null)            {                String[] line=s.split("\\|");                double line_Ri=Double.parseDouble(line[select_Ri]);                if(line_Ri<=select_number)                {                    String[] line_distinct=new String[distinct_number.length];                    int j=0;                    for(int i:distinct_number)                    {                        line_distinct[j]=line[i];                        j++;                    }                    lst.add(line_distinct);                }             }        }        if(select[1].equals("lt"))        {            while ((s=in.readLine())!=null)             {                String[] line=s.split("\\|");                double line_Ri=Double.parseDouble(line[select_Ri]);                if(line_Ri<select_number)                {                    String[] line_distinct=new String[distinct_number.length];                    int j=0;                    for(int i:distinct_number)                    {                        line_distinct[j]=line[i];                        j++;                    }                    lst.add(line_distinct);                }             }         }                  //sort lst         ComparatorString comparator=new ComparatorString();        Collections.sort(lst, comparator);        //distinct lst        List <String[]> list_distinct=listDistinct(lst);        //put lst to Hbase        putHbase(list_distinct, distinct);        in.close();        fs.close();      }      public static List<String[]> listDistinct(List<String[]> list){           //@return distinct list           List <String[]> list_distinct = new ArrayList <String[]> ();           String[] s=(String[]) list.get(0);           list_distinct.add(s);           for (int i=1;i<list.size();i++){               String[] s1=(String[]) list.get(i-1);               String[] s2=(String[]) list.get(i);               int j;               //compare s1[i] with s2[i]               for (j=0;j<s2.length;j++){                   if(s1[j].equals(s2[j])) continue;                   else break;               }               //if exists i,s1[i]!=s2[i],add s2 to the list               if (j<s2.length){                  list_distinct.add(s2);               }            }           return list_distinct;       }           public static void putHbase(List<String[]> list, String[] distinct)throws MasterNotRunningException, ZooKeeperConnectionException, IOException {           Logger.getRootLogger().setLevel(Level.WARN);           // create table descriptor           String tableName= "Result";           HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(tableName));           // create column descriptor           HColumnDescriptor res = new HColumnDescriptor("res");           htd.addFamily(res);           // configure HBase           Configuration configuration = HBaseConfiguration.create();           HBaseAdmin hAdmin = new HBaseAdmin(configuration);           if (hAdmin.tableExists(tableName)) {               System.out.println("Table already exists");               hAdmin.disableTable(tableName);               hAdmin.deleteTable(tableName);               hAdmin.createTable(htd);           }           else {               hAdmin.createTable(htd);               System.out.println("table "+tableName+ " created successfully");           }              hAdmin.close();           HTable table = new HTable(configuration,tableName);           for (int i=0;i<list.size();i++){                String[] s=(String[]) list.get(i);                for (int j=0;j<distinct.length;j++){                    Put put = new Put(String.valueOf(i).getBytes());                    put.add("res".getBytes(),distinct[j].getBytes(),s[j].getBytes());                    table.put(put);                  }           }           table.close();           System.out.println("put successfully");    }}

0 0