准备 准备一些输入文件,可以用hdfs dfs -put xxx/*?/user/fatkun/input上传文件 代码 package com.fatkun;?import java.io.ioexception;import java.util.arraylist;import java.util.list;import java.util.stringtokenizer;?import org.apache.commons.lo
            准备准备一些输入文件,可以用hdfs dfs -put xxx/*?/user/fatkun/input上传文件
代码package com.fatkun;?import java.io.ioexception;import java.util.arraylist;import java.util.list;import java.util.stringtokenizer;?import org.apache.commons.logging.log;import org.apache.commons.logging.logfactory;import org.apache.hadoop.conf.configuration;import org.apache.hadoop.conf.configured;import org.apache.hadoop.fs.path;import org.apache.hadoop.io.intwritable;import org.apache.hadoop.io.longwritable;import org.apache.hadoop.io.text;import org.apache.hadoop.mapreduce.job;import org.apache.hadoop.mapreduce.mapper;import org.apache.hadoop.mapreduce.reducer;import org.apache.hadoop.mapreduce.lib.input.fileinputformat;import org.apache.hadoop.mapreduce.lib.output.fileoutputformat;import org.apache.hadoop.util.tool;import org.apache.hadoop.util.toolrunner;?public class wordcount extends configured implements tool {    static enum counters {        input_words // 计数器    } ?    static log logger = logfactory.getlog(wordcount.class);?    public static class countmapper extends            mapper {        private final intwritable one = new intwritable(1);        private text word = new text();        private boolean casesensitive = true;?        @override        protected void setup(context context) throws ioexception,                interruptedexception {            // 读取配置            configuration conf = context.getconfiguration();            casesensitive = conf.getboolean(wordcount.case.sensitive, true);            super.setup(context);        }?        @override        protected void map(longwritable key, text value, context context)                throws ioexception, interruptedexception {            stringtokenizer itr = new stringtokenizer(value.tostring());            while (itr.hasmoretokens()) {                if (casesensitive) { // 是否大小写敏感                    word.set(itr.nexttoken());                } else {                    word.set(itr.nexttoken().tolowercase());                }                context.write(word, one);                context.getcounter(counters.input_words).increment(1);            }        }    }?    public static class countreducer extends            reducer {?        @override        protected void reduce(text text, iterable values,                context context) throws ioexception, interruptedexception {            int sum = 0;            for (intwritable value : values) {                sum += value.get();            }            context.write(text, new intwritable(sum));        }?    }?    @override    public int run(string[] args) throws exception {        configuration conf = new configuration(getconf());        job job = job.getinstance(conf, example hadoop wordcount);        job.setjarbyclass(wordcount.class);        job.setmapperclass(countmapper.class);        job.setcombinerclass(countreducer.class);        job.setreducerclass(countreducer.class);?        job.setoutputkeyclass(text.class);        job.setoutputvalueclass(intwritable.class);?        list other_args = new arraylist();        for (int i = 0; i 运行在eclipse导出jar包,执行以下命令
hadoop jar wordcount.jar com.fatkun.wordcount -dwordcount.case.sensitive=false /user/fatkun/input /user/fatkun/output
参考http://cxwangyi.blogspot.com/2009/12/wordcount-tutorial-for-hadoop-0201.html
http://hadoop.apache.org/docs/r1.2.1/mapred_tutorial.html#example%3a+wordcount+v2.0
            原文地址:hadoop wordcount新api例子, 感谢原作者分享。
   
 
   