Lab 21: MapReduce with Sequence File

Hi Hadoopers,

This post would be the continuation of my previous post on Sequence File. The output of my previous post is being read in this MapReduce program

This program will accept a sequence file as input and emit a text file as output.


package org.grassfield.nandu.etl;


import org.apache.hadoop.mapreduce.Mapper;

public class SeqFileReadMapper
        extends Mapper<Text, Text, Text, Text> {

    protected void map(Text key, Text value,
            Mapper<Text, Text, Text, Text>.Context context)
            throws IOException, InterruptedException {
        System.out.println("key:"+key+" "+key.getClass());
        System.out.println("value:"+value.toString()+" "+value.getClass());
        context.write(key, value);


package org.grassfield.nandu.etl;


import org.apache.hadoop.mapreduce.Reducer;

public class SeqFileReadReducer
        extends Reducer<Text, Text, Text, Text> {

    protected void reduce(Text key, Iterable<Text> values,
            Reducer<Text, Text, Text, Text>.Context context)
            throws IOException, InterruptedException {
        for(Text record:values){
            context.write(key, record);


package org.grassfield.nandu.etl;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

public class SeqFileReadJob extends Configured implements Tool {

    public static void main(String[] args) throws Exception { Configuration(), new SeqFileReadJob(), args);

    public int run(String[] args) throws Exception {
        Job job = new Job(getConf());
        Configuration conf = job.getConfiguration();
        FileInputFormat.addInputPath(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
        return 0;



$ hadoop jar FeedCategoryCount-21.jar org.grassfield.nandu.etl.SeqFileReadJob /user/hadoop/lab21/input/ /user/hadoop/lab21/19

$ hadoop fs -ls /user/hadoop/lab21/19
Found 2 items
-rw-r--r--   3 hadoop supergroup          0 2016-10-09 00:54 /user/hadoop/lab21/19/_SUCCESS
-rw-r--r--   3 hadoop supergroup        130 2016-10-09 00:54 /user/hadoop/lab21/19/part-r-00000
hadoop@gandhari:/opt/hadoop-2.6.4/jars$ hadoop fs -cat /user/hadoop/lab21/19/part-r-00000

$ hadoop fs -cat /user/hadoop/lab21/19/part-r-00000
0       101,Duryodhana,Dhritarashtra,Gandhari,Bhanumati
0       101,2000
18      101,4000
27      102,3000
48      102,Bheema,Pandu,Kunti,Hidimbi
9       102,1500

Leave a Reply

Fill in your details below or click an icon to log in: Logo

You are commenting using your account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s