Lab 21: MapReduce with Sequence File

Hi Hadoopers,

This post would be the continuation of my previous post on Sequence File. The output of my previous post is being read in this MapReduce program

This program will accept a sequence file as input and emit a text file as output.

Mapper:

package org.grassfield.nandu.etl;

import java.io.IOException;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class SeqFileReadMapper
        extends Mapper<Text, Text, Text, Text> {

    @Override
    protected void map(Text key, Text value,
            Mapper<Text, Text, Text, Text>.Context context)
            throws IOException, InterruptedException {
        System.out.println("key:"+key+" "+key.getClass());
        System.out.println("value:"+value.toString()+" "+value.getClass());
        context.write(key, value);
    }
}

Reducer

package org.grassfield.nandu.etl;

import java.io.IOException;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class SeqFileReadReducer
        extends Reducer<Text, Text, Text, Text> {

    @Override
    protected void reduce(Text key, Iterable<Text> values,
            Reducer<Text, Text, Text, Text>.Context context)
            throws IOException, InterruptedException {
        for(Text record:values){
            context.write(key, record);
        }
    }
}

Driver

package org.grassfield.nandu.etl;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class SeqFileReadJob extends Configured implements Tool {

    public static void main(String[] args) throws Exception {
        ToolRunner.run(new Configuration(), new SeqFileReadJob(), args);
    }

    @Override
    public int run(String[] args) throws Exception {
        Job job = new Job(getConf());
        Configuration conf = job.getConfiguration();
        job.setJarByClass(this.getClass());
        job.setJobName("SeqFileReadJob");
        
        FileInputFormat.addInputPath(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
        
        job.setMapperClass(SeqFileReadMapper.class);
        job.setReducerClass(SeqFileReadReducer.class);
        
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);
        
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);
        
        job.setNumReduceTasks(1);
        job.waitForCompletion(true);
        return 0;
    }

}

Execution

$ hadoop jar FeedCategoryCount-21.jar org.grassfield.nandu.etl.SeqFileReadJob /user/hadoop/lab21/input/ /user/hadoop/lab21/19

$ hadoop fs -ls /user/hadoop/lab21/19
Found 2 items
-rw-r--r--   3 hadoop supergroup          0 2016-10-09 00:54 /user/hadoop/lab21/19/_SUCCESS
-rw-r--r--   3 hadoop supergroup        130 2016-10-09 00:54 /user/hadoop/lab21/19/part-r-00000
hadoop@gandhari:/opt/hadoop-2.6.4/jars$ hadoop fs -cat /user/hadoop/lab21/19/part-r-00000

$ hadoop fs -cat /user/hadoop/lab21/19/part-r-00000
0       101,Duryodhana,Dhritarashtra,Gandhari,Bhanumati
0       101,2000
18      101,4000
27      102,3000
48      102,Bheema,Pandu,Kunti,Hidimbi
9       102,1500
Advertisements

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s