package com.hadoop.learn.test;
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.log4j.Logger;
/**
* 运行测试程序
*
* @author yongboy
* @date 2012-04-16
*/
public class WordCountTest {
private static final Logger log = Logger.getLogger(WordCountTest.class);
public static class TokenizerMapper extends
Mapper<Object, Text, Text, IntWritable> {
private final static IntWritable one = new IntWritable(1);
private Text word = new Text();
public void map(Object key, Text value, Context context)
throws IOException, InterruptedException {
log.info("Map key : " + key);
log.info("Map value : " + value);
StringTokenizer itr = new StringTokenizer(value.toString());
while (itr.hasMoreTokens()) {
String wordStr = itr.nextToken();
word.set(wordStr);
log.info("Map word : " + wordStr);
context.write(word, one);
}
}
}
public static class IntSumReducer extends
Reducer<Text, IntWritable, Text, IntWritable> {
private IntWritable result = new IntWritable();
public void reduce(Text key, Iterable<IntWritable> values,
Context context) throws IOException, InterruptedException {
log.info("Reduce key : " + key);
log.info("Reduce value : " + values);
int sum = 0;
for (IntWritable val : values) {
sum += val.get();
}
result.set(sum);
log.info("Reduce sum : " + sum);
context.write(key, result);
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args)
.getRemainingArgs();
if (otherArgs.length != 2) {
System.err.println("Usage: WordCountTest <in> <out>");
System.exit(2);
}
Job job = new Job(conf, "word count");
job.setJarByClass(WordCountTest.class);
job.setMapperClass(TokenizerMapper.class);
job.setCombinerClass(IntSumReducer.class);
job.setReducerClass(IntSumReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
相关推荐
Eclipse中远程调试Hadoop必备资料:hadoop-eclipse-plugin-1.1.1和hadoop-core-1.0.2-modified;已经在eclipse-jee-juno-SR1-win32-x86_64和hadoop1.1.1 下测试过。
Hadoop学习笔记,自己总结的一些Hadoop学习笔记,比较简单。
Hadoop 学习笔记.md
eclipse远程调试出现Exception in thread “main” java.lang.UnsatisfiedLinkError,eclipse4.5远程调试hadoop2.7.4依赖包 ,方法 将匹配的hadoop.dll、winutils.exe、libwinutils.lib拷贝到C:\Windows\System32 详情...
hadoop学习笔记,hadoop简介,适用于hadoop入门,讲解hadoop安装,使用,基本原理,大数据,分布式等概念
hadoop 学习笔记,从搭建环境开始到具体实验。包括hdfs配置,yarn配置,分布式配置,如何编写mapreuduce 一步一步手把手,最后项目是hadoop 与 javaweb
eclipse配置hadoop,并且如何在eclipse中进行mapreduce的开发
hadoop-2.8.0 版本的eclipse插件
Hadoop Eclipse是Hadoop开发环境的插件,用户在创建Hadoop程序时,Eclipse插件会自动导入Hadoop编程接口的jar文件,这样用户就可以在Eclipse插件的图形界面中进行编码、调试和运行Hadop程序,也能通过Eclipse插件...
云计算,hadoop,学习笔记, dd
我学习hadoop的笔记,并在公司做的报告,给大家共享下
Hadoop在eclipse上面安装插件,以及对Hadoop jar包和hadoop-common jar包的安装,环境变量的配置
1、使用hadoop-eclipse-plugin-2.9.2.jar该eclipse插件,可以访问远程的hdfs 2、使用hadoop-eclipse-plugin-2.9.2-local.jar,访问本地的文件系统 3、wintuils_hadoop.zip本地eclipse运行需要的包。 另外2.9.2hadoop...
Hadoop学习笔记
eclipse远程调试hadoop,报Could not locate executable null\bin\winutils.exe ,将winutils.exe放入到hadoop的bin目录下,报UnsatisfiedLinkError错,将 hadoop.dll放入C:\Windows\System32下
将此jar包拷贝到eclipse的plugins目录下,然后重启Eclipse,即可在eclipse中查看那到hadoop选择项. 将此jar包拷贝到eclipse的plugins目录下,然后重启Eclipse,即可在eclipse中查看那到hadoop选择项
Hadoop学习笔记AAAAAAAAAAA
Eclipse集成Hadoop2.10.0的插件,使用`ant`对hadoop的jar包进行打包并适应Eclipse加载,所以参数里有hadoop和eclipse的目录. 必须注意对于不同的hadoop版本,` HADDOP_INSTALL_PATH/share/hadoop/common/lib`下的jar包...
hadoop学习笔记.rarhadoop学习笔记.rarhadoop学习笔记.rarhadoop学习笔记.rarhadoop学习笔记.rarhadoop学习笔记.rarhadoop学习笔记.rar