Note the name is important. It must be exactly equal to the class name.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.IOException; | |
import java.util.StringTokenizer; | |
import org.apache.hadoop.conf.Configuration; | |
import org.apache.hadoop.fs.Path; | |
import org.apache.hadoop.io.IntWritable; | |
import org.apache.hadoop.io.Text; | |
import org.apache.hadoop.mapreduce.Job; | |
import org.apache.hadoop.mapreduce.Mapper; | |
import org.apache.hadoop.mapreduce.Reducer; | |
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; | |
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; | |
public class WordCount { | |
public static class TokenizerMapper | |
extends Mapper<Object, Text, Text, IntWritable>{ | |
private final static IntWritable one = new IntWritable(1); | |
private Text word = new Text(); | |
public void map(Object key, Text value, Context context | |
) throws IOException, InterruptedException { | |
StringTokenizer itr = new StringTokenizer(value.toString()); | |
while (itr.hasMoreTokens()) { | |
word.set(itr.nextToken()); | |
context.write(word, one); | |
} | |
} | |
} | |
public static class IntSumReducer | |
extends Reducer<Text,IntWritable,Text,IntWritable> { | |
private IntWritable result = new IntWritable(); | |
public void reduce(Text key, Iterable<IntWritable> values, | |
Context context | |
) throws IOException, InterruptedException { | |
int sum = 0; | |
for (IntWritable val : values) { | |
sum += val.get(); | |
} | |
result.set(sum); | |
context.write(key, result); | |
} | |
} | |
public static void main(String[] args) throws Exception { | |
Configuration conf = new Configuration(); | |
Job job = Job.getInstance(conf, "word count"); | |
job.setJarByClass(WordCount.class); | |
job.setMapperClass(TokenizerMapper.class); | |
job.setCombinerClass(IntSumReducer.class); | |
job.setReducerClass(IntSumReducer.class); | |
job.setOutputKeyClass(Text.class); | |
job.setOutputValueClass(IntWritable.class); | |
FileInputFormat.addInputPath(job, new Path(args[0])); | |
FileOutputFormat.setOutputPath(job, new Path(args[1])); | |
System.exit(job.waitForCompletion(true) ? 0 : 1); | |
} | |
} | |
//code credit: https://hadoop.apache.org/docs/r2.10.0/hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduceTutorial.html |
export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/ export PATH=${JAVA_HOME}/bin:${PATH} export HADOOP_CLASSPATH=${JAVA_HOME}/lib/tools.jar
Compile WordCount.java and create a jar:
$ hadoop com.sun.tools.javac.Main WordCount.java jar cf wc.jar WordCount*.class
Create two files for input to the Map-Reduce. We will make two files in the input folder as follows
mkdir input echo "Hello World Bye World" > input/file01 echo "Hello Hadoop Goodbye Hadoop" > input/file02
We also need to create the input folder on HDFS
hadoop fs -mkdir -p /user/$USER/input
Now we need to move these files into HDFS
hadoop fs -copyFromLocal input/ /user/$USER/
Verify that the files have been copied
hadoop fs -ls /user/zaid/input
It should show two files as follows
Found 2 items -rw-r--r-- 1 zaid supergroup 22 2020-04-17 09:55 /user/zaid/input/file01 -rw-r--r-- 1 zaid supergroup 28 2020-04-17 09:55 /user/zaid/input/file02
Now lets run the application
hadoop jar wc.jar WordCount /user/$USER/input /user/$USER/output
The program should run and show a lot of output and hopefully no errors. Once complete you can check the output as follows
$ hadoop fs -cat /user/$USER/output/part-r-00000