1.
Map Reduce program to count the number of occurrences of each word in a
given input text.
driver.java
package wordcount;
import java.io. *;
import java.util.*;
import org.apache.hadoop.mapred.*;
import org.apache.hadoop.io.*;
import org.apache.hadoop.fs.Path;
public class driver
{
public static void main(String args[]) throws IOException
{
JobConf conf=new JobConf(driver.class);
conf.setMapperClass(mapper.class);
conf.setReducerClass(reducer.class);
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(conf, new Path(args[0]));
FileOutputFormat.setOutputPath(conf,new Path(args[1]));
JobClient.runJob(conf);
}
}
mapper.java
package wordcount;
import java.io.*;
import java.util.*;
import org.apache.hadoop.mapred.*;
import org.apache.hadoop.io.*;
public class mapper extends MapReduceBase implements Mapper<LongWritable, Text, Text,
IntWritable> {
// Static final variable for the count of 1
private final static IntWritable one = new IntWritable(1);
// Reusable Text object to hold each word
private Text word = new Text();
// The map function
public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output,
Reporter reporter)
throws IOException {
// Convert the input value (line of text) to a string
String line = value.toString();
// Tokenize the line into words
StringTokenizer tokenizer = new StringTokenizer(line);
// Iterate through the tokens (words)
while (tokenizer.hasMoreTokens()) {
// Set the current word into the Text object
word.set(tokenizer.nextToken());
// Collect the word and emit (word, 1) as key-value pairs
output.collect(word, one);
}
}
}
reducer.java
package wordcount;
import java.io.*;
import java.util.*;
import org.apache.hadoop.mapred.*;
import org.apache.hadoop.io.*;
public class reducer extends MapReduceBase implements Reducer<Text, IntWritable, Text,
IntWritable> {
public void reduce(Text key, Iterator<IntWritable> values, OutputCollector<Text, IntWritable>
output,
Reporter reporter) throws IOException {
int sum = 0;
// Sum up the counts for each word
while (values.hasNext()) {
sum += values.next().get();
}
// Emit the word with the total count
output.collect(key, new IntWritable(sum));
}
}
Steps to run
1. Create a New File named Bash.sh
2. Copy the Below code and Paste inside Bash.sh and save that File.
export JAVA_HOME=$(readlink -f $(which javac) | awk 'BEGIN {FS="/bin"} {print $1}')
export PATH=$(echo $PATH):$(pwd)/bin
export CLASSPATH=$(hadoop classpath)
3. Execute the bash.sh File using following command source Bash.sh.
4. Verify JAVA_HOME variable to be set to Java Path and PATH variable has your USN
Hadoop Folder.
If any previous PATH set to Hadoop Folder remove that inside .bashrc file.
5. Verify Hadoop is Installed or not by executing hadoop command.if command gives
Information about
Hadoop command then Hadoop is Successfully Installed.
6. Create a folder word count and move to that folder.
7. Make the driver.java , mapper.java and reducer.java files.
8. Compile all java files (driver.java mapper.java reducer.java)
javac -d . *.java
9. Set driver class in manifest
echo Main-Class: wordcount.driver > Manifest.txt
10. Create an executable jar file
jar cfm wordcount.jar Manifest.txt word count/*.class
11. oe.txt is input file for Oddeven create Input File
echo “hello good morning, hello have a nice day” > input.txt
12. Run the jar file
hadoop jar wordcount.jar input.txt output
13. To see the Output
cat output/*