KEMBAR78
Word count program execution steps in hadoop | PDF
WORD COUNT PROGRAM EXECUTION STEPS IN HADOOP
Step 1: Create a directory called wordcount in /home/user/Documents/
cd /home/user/Documents/
sudo mkdir wordcount
cd wordcount
Step 2: Create a WordCount.java file in the wordcount directory
vi WordCount.java
Sample content of the WordCount.java file
//package org.myorg;
import java.io.IOException;
import java.util.*;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapred.*;
import org.apache.hadoop.util.*;
public class WordCount {
public static class Map extends MapReduceBase implements Mapper<LongWritable, Text,
Text, IntWritable> {
private final static IntWritable one = new IntWritable(1);
private Text word = new Text();
public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable>
output, Reporter reporter) throws IOException {
String line = value.toString();
StringTokenizer tokenizer = new StringTokenizer(line);
while (tokenizer.hasMoreTokens()) {
word.set(tokenizer.nextToken());
output.collect(word, one);
}
}
}
public static class Reduce extends MapReduceBase implements Reducer<Text, IntWritable,
Text, IntWritable> {
public void reduce(Text key, Iterator<IntWritable> values, OutputCollector<Text,
IntWritable> output, Reporter reporter) throws IOException {
int sum = 0;
while (values.hasNext()) {
sum += values.next().get();
}
output.collect(key, new IntWritable(sum));
}
}
public static void main(String[] args) throws Exception {
JobConf conf = new JobConf(WordCount.class);
conf.setJobName("wordcount");
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(IntWritable.class);
conf.setMapperClass(Map.class);
//conf.setCombinerClass(Reduce.class);
conf.setReducerClass(Reduce.class);
conf.setInputFormat(TextInputFormat.class);
conf.setOutputFormat(TextOutputFormat.class);
FileInputFormat.setInputPaths(conf, new Path(args[0]));
FileOutputFormat.setOutputPath(conf, new Path(args[1]));
JobClient.runJob(conf);
}
}
Step 3: Create a directory called wordcountc in /home/user/Documents/wordcount/
sudo mkdir wordcountc
Step 4: Create a directory on the Hadoop file system
hdfs dfs -mkdir /example1
Step 5: Copy the input file from the local system to Hadoop file system
hdfs dfs -copyFromLocal /home/user/Documents/emp.txt /example/
Step 6: sudo javac -classpath /usr/local/hadoop/share/hadoop/common/hadoop-common-
2.6.0.jar:/usr/local/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-client-core-
2.6.0.jar:/usr/local/hadoop/share/hadoop/common/lib/hadoop-annotations-2.6.0.jar -d
wordcountc/ WordCount.java
Step 7: Now after compilation 3 class files will be generated in the directory 'wordcount'
Step 8: Create jar file using the command
sudo jar -cvf wordcountj.jar -C /home/user/Documents/wordcount/wordcountc .
Step 9: Change to /usr/local/hadoop/ folder
cd /usr/local/hadoop/ folder
Step 10: Execute using the below command
bin/hadoop jar /home/user/Documents/wordcount/wordcountj.jar WordCount
/example1/emp.txt output
Step 11: The output can be checked by typing http://localhost:50070 on the browser window. It
will display data node information. In the menu, under 'utilities', you can see an option for
'browse the file system'. Click that and find out the result of the execution under
'/user/hdpuser/output' directory.
Execution of wordcount program without creating a jar file
Execute the following command
bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.6.0.jar
wordcount /example1/emp.txt output1
Prepared by
Jiju K Joseph, AP/CSE
Asan Memorial College of Engg. & Tech

Word count program execution steps in hadoop

  • 1.
    WORD COUNT PROGRAMEXECUTION STEPS IN HADOOP Step 1: Create a directory called wordcount in /home/user/Documents/ cd /home/user/Documents/ sudo mkdir wordcount cd wordcount Step 2: Create a WordCount.java file in the wordcount directory vi WordCount.java Sample content of the WordCount.java file //package org.myorg; import java.io.IOException; import java.util.*; import org.apache.hadoop.fs.Path; import org.apache.hadoop.conf.*; import org.apache.hadoop.io.*; import org.apache.hadoop.mapred.*; import org.apache.hadoop.util.*; public class WordCount { public static class Map extends MapReduceBase implements Mapper<LongWritable, Text, Text, IntWritable> { private final static IntWritable one = new IntWritable(1); private Text word = new Text(); public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { String line = value.toString(); StringTokenizer tokenizer = new StringTokenizer(line); while (tokenizer.hasMoreTokens()) { word.set(tokenizer.nextToken()); output.collect(word, one); } } } public static class Reduce extends MapReduceBase implements Reducer<Text, IntWritable, Text, IntWritable> { public void reduce(Text key, Iterator<IntWritable> values, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { int sum = 0; while (values.hasNext()) { sum += values.next().get(); } output.collect(key, new IntWritable(sum)); } } public static void main(String[] args) throws Exception { JobConf conf = new JobConf(WordCount.class);
  • 2.
    conf.setJobName("wordcount"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(Map.class); //conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf,new Path(args[1])); JobClient.runJob(conf); } } Step 3: Create a directory called wordcountc in /home/user/Documents/wordcount/ sudo mkdir wordcountc Step 4: Create a directory on the Hadoop file system hdfs dfs -mkdir /example1 Step 5: Copy the input file from the local system to Hadoop file system hdfs dfs -copyFromLocal /home/user/Documents/emp.txt /example/ Step 6: sudo javac -classpath /usr/local/hadoop/share/hadoop/common/hadoop-common- 2.6.0.jar:/usr/local/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-client-core- 2.6.0.jar:/usr/local/hadoop/share/hadoop/common/lib/hadoop-annotations-2.6.0.jar -d wordcountc/ WordCount.java Step 7: Now after compilation 3 class files will be generated in the directory 'wordcount' Step 8: Create jar file using the command sudo jar -cvf wordcountj.jar -C /home/user/Documents/wordcount/wordcountc . Step 9: Change to /usr/local/hadoop/ folder cd /usr/local/hadoop/ folder Step 10: Execute using the below command bin/hadoop jar /home/user/Documents/wordcount/wordcountj.jar WordCount /example1/emp.txt output Step 11: The output can be checked by typing http://localhost:50070 on the browser window. It will display data node information. In the menu, under 'utilities', you can see an option for 'browse the file system'. Click that and find out the result of the execution under '/user/hdpuser/output' directory. Execution of wordcount program without creating a jar file Execute the following command bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.6.0.jar wordcount /example1/emp.txt output1 Prepared by Jiju K Joseph, AP/CSE
  • 3.
    Asan Memorial Collegeof Engg. & Tech