HBase and HDFS data mutual guide program 07/19 Update SLTechnology News&Howtos

HBase and HDFS data mutual guide program

2025-07-19 Update From: SLTechnology News&Howtos shulou NAV: SLTechnology News&Howtos > Database >

Shulou(Shulou.com)06/01 Report--

Let's talk about the functions of these classes provided by JAVA API and how they relate to each other.

1.HBaseConfiguration

Relationship: org.apache.hadoop.hbase.HBaseConfiguration

Purpose: HBase can be configured through this class

Example of usage: Configuration config = HBaseConfiguration.create ()

Note: by default, HBaseConfiguration.create () looks up the configuration information in hbase-site.xml from classpath and initializes Configuration.

2.HBaseAdmin class

Relationship: org.apache.hadoop.hbase.client.HBaseAdmin

Purpose: to provide table information in the interface relational HBase database

Usage: HBaseAdmin admin = new HBaseAdmin (config)

3.Descriptor class

Relationship: org.apache.hadoop.hbase.HTableDescriptor

Purpose: the HTableDescriptor class contains the name of the table and the column family information of the table

Usage: HTableDescriptor htd = new HTableDescriptor (tablename)

Construct a table descriptor to specify the TableName object.

Htd.addFamily (new HColumnDescriptor ("myFamily"))

Sets the descriptor given by the column family

4.HTable

Relationship: org.apache.hadoop.hbase.client.HTable

Function: table communication between HTable and HBase

Usage: HTable tab = new HTable (config,Bytes.toBytes (tablename))

ResultScanner sc = tab.getScanner (Bytes.toBytes ("familyName"))

Description: get all the data of the column family familyNme in the table.

5.Put

Relationship: org.apache.hadoop.hbase.client.Put

Purpose: to get the data of a single row

Usage: HTable table = new HTable (config,Bytes.toBytes (tablename))

Put put = new Put (row)

P.add (family,qualifier,value)

Description: adds the value specified by "family,qualifier,value" to the table tablename.

6.Get

Relationship: org.apache.hadoop.hbase.client.Get

Purpose: to get the data of a single row

Usage: HTable table = new HTable (config,Bytes.toBytes (tablename))

Get get = new Get (Bytes.toBytes (row))

Result result = table.get (get)

Description: get the corresponding data of the row row in the tablename table

7.ResultScanner

Relationship: Interface

Function: the interface to get the value

Usage: ResultScanner scanner = table.getScanner (Bytes.toBytes (family))

For (Result rowResult: scanner) {

Bytes [] str = rowResult.getValue (family,column)

}

Description: loop to get the column values in the row.

Example 1 the read HDFS data of HBase is written to HBase

Package org.hadoop.hbase;import java.io.IOException;import java.util.StringTokenizer;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.hbase.HBaseConfiguration;import org.apache.hadoop.hbase.HColumnDescriptor;import org.apache.hadoop.hbase.HTableDescriptor;import org.apache.hadoop.hbase.client.HBaseAdmin;import org.apache.hadoop.hbase.client.Put;import org.apache.hadoop.hbase.io.ImmutableBytesWritable;import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil Import org.apache.hadoop.hbase.mapreduce.TableReducer;import org.apache.hadoop.hbase.util.Bytes;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.util.GenericOptionsParser;public class WordCountHbaseWriter {public static class WordCountHbaseMapper extends Mapper {private final static IntWritable one = new IntWritable (1); private Text word = new Text () Public void map (Object key, Text value, Context context) throws IOException, InterruptedException {StringTokenizer itr = new StringTokenizer (value.toString ()); while (itr.hasMoreTokens ()) {word.set (itr.nextToken ()); context.write (word, one); / / output}} public static class WordCountHbaseReducer extends TableReducer {public void reduce (Text key, Iterable values, Context context) throws IOException, InterruptedException {int sum = 0 For (IntWritableval: values) {/ / ergodic summation sum + = val.get ();} Put put = new Put (key.getBytes ()); / / put instantiation, each word is instantiated with a row / column family of content, column modifier count, column value put.add (Bytes.toBytes ("content"), Bytes.toBytes ("count"), Bytes.toBytes (String.valueOf (sum)); context.write (new ImmutableBytesWritable (key.getBytes ()), put) / output the summed} public static void main (String [] args) {String tablename = "wordcount"; Configuration conf = HBaseConfiguration.create (); conf.set ("hbase.zookeeper.quorum", "192.168.1.139"); conf.set ("hbase.zookeeper.property.clientPort", "2191"); HBaseAdmin admin = null; try {admin = new HBaseAdmin (conf) If (admin.tableExists (tablename)) {System.out.println ("table recording."); admin.disableTable (tablename); admin.deleteTable (tablename);} HTableDescriptor htd = new HTableDescriptor (tablename); HColumnDescriptor tcd = new HColumnDescriptor ("content"); htd.addFamily (tcd); / / create column family admin.createTable (htd); / / create table String [] otherArgs = new GenericOptionsParser (conf, args). GetRemainingArgs () If (otherArgs.length! = 1) {System.err.println ("Usage: WordCountHbaseWriter"); System.exit (2);} Job job = new Job (conf, "WordCountHbaseWriter"); job.setNumReduceTasks (2); job.setJarByClass (WordCountHbaseWriter.class); / / use the WordCountHbaseMapper class to complete the Map process Job.setMapperClass (WordCountHbaseMapper.class); TableMapReduceUtil.initTableReducerJob (tablename, WordCountHbaseReducer.class, job); / / set the input path of task data; FileInputFormat.addInputPath (job, new Path (otherArgs [0])); / / sets the output type of the Map process, where the output type of key is set to Text Job.setOutputKeyClass (Text.class); / / sets the output type of the Map process, where the output type of value is set to IntWritable; job.setOutputValueClass (IntWritable.class); / / calls job.waitForCompletion (true) to execute the task, and exits after successful execution System.exit (job.waitForCompletion (true)? 0: 1);} catch (Exception e) {e.printStackTrace ();} finally {if (adminstration null) try {admin.close ();} catch (IOException e) {e.printStackTrace ();}

Example 2 the read HBase data of HBase is written to HDFS

Package org.hadoop.hbase;import java.io.IOException;import java.util.Map.Entry;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.hbase.HBaseConfiguration;import org.apache.hadoop.hbase.client.Result;import org.apache.hadoop.hbase.client.Scan;import org.apache.hadoop.hbase.io.ImmutableBytesWritable;import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;import org.apache.hadoop.hbase.mapreduce.TableMapper;import org.apache.hadoop.io.Text Import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.util.GenericOptionsParser;public class WordCountHbaseReader {public static class WordCountHbaseReaderMapper extends TableMapper {@ Override protected void map (ImmutableBytesWritable key,Result value,Context context) throws IOException, InterruptedException {StringBuffer sb = new StringBuffer ("") For (Entry entry:value.getFamilyMap ("content" .getBytes ()). EntrySet () {String str = new String (entry.getValue ()); / / convert the byte array to String type if (str! = null) {sb.append (new String (entry.getKey ()); sb.append (":") Sb.append (str);} context.write (new Text (key.get ()), new Text (new String (sb);}} public static class WordCountHbaseReaderReduce extends Reducer {private Text result = new Text () @ Override protected void reduce (Text key, Iterable values,Context context) throws IOException, InterruptedException {for (Text val:values) {result.set (val); context.write (key, result);} public static void main (String [] args) throws Exception {String tablename = "wordcount"; Configuration conf = HBaseConfiguration.create () Conf.set ("hbase.zookeeper.quorum", "192.168.1.139"); conf.set ("hbase.zookeeper.property.clientPort", "2191"); String [] otherArgs = new GenericOptionsParser (conf, args). GetRemainingArgs (); if (otherArgs.length! = 1) {System.err.println ("Usage: WordCountHbaseReader"); System.exit (2);} Job job = new Job (conf, "WordCountHbaseReader") Job.setJarByClass (WordCountHbaseReader.class); / / sets the output path of task data FileOutputFormat.setOutputPath (job, new Path (otherArgs [0])); job.setReducerClass (WordCountHbaseReaderReduce.class); Scan scan = new Scan (); TableMapReduceUtil.initTableMapperJob (tablename,scan,WordCountHbaseReaderMapper.class, Text.class, Text.class, job); / / call job.waitForCompletion (true) to execute the task and exit after successful execution; System.exit (job.waitForCompletion (true)? 0: 1);}}

Related JAR packages of hadoop (as shown below) and all jar packages of hbase used in the program

If the above API does not meet your requirements, you can go to the following website to introduce all the Hbase API.

Http://www.yiibai.com/hbase/

Welcome to subscribe "Shulou Technology Information " to get latest news, interesting things and hot topics in the IT industry, and controls the hottest and latest Internet news, technology news and IT industry trends.

*The comments in the above article only represent the author's personal views and do not represent the views and positions of this website. If you have more insights, please feel free to contribute and share.