文章/答案/技术大牛

发布

问Hadoop - MapReduce
EN

Stack Overflow用户

提问于 2014-12-14 16:06:22

回答 2查看 443关注 0票数 0

我一直试图解决一个简单的Map/Reduce问题，在这个问题中，我会从一些输入文件中计数单词，然后将它们的频率作为一个键，将它们的字长作为另一个键。映射将每隔一次从文件中读取一个新单词，然后将所有相同的单词组合在一起，以得到它们的最终计数。然后，作为输出，我想看到每个字的长度的统计数据，什么是最频繁的单词。

这就是我们所得到的(我和我的团队)：这是WordCountMapper类

import java.io.IOException;
import java.util.ArrayList;
import java.util.StringTokenizer;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;


public class WordCountMapper extends MapReduceBase implements
                Mapper<LongWritable, Text, Text, CompositeGroupKey> {

        private final IntWritable one = new IntWritable(1);
        private Text word = new Text();

                 public void map(LongWritable key, Text value,
                 OutputCollector<Text, CompositeGroupKey> output, Reporter reporter)
                 throws IOException {

                 String line = value.toString();
                 StringTokenizer itr = new StringTokenizer(line.toLowerCase());
                 while(itr.hasMoreTokens()) {
                 word.set(itr.nextToken());
                 CompositeGroupKey gky = new CompositeGroupKey(1, word.getLength());
                 output.collect(word, gky);
                 }
                 }
}

这是This可数类：

import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;

import com.sun.xml.internal.bind.CycleRecoverable.Context;

public class WordCountReducer extends MapReduceBase
    implements Reducer<Text, CompositeGroupKey, Text, CompositeGroupKey> {

        @Override
        public void reduce(Text key, Iterator<CompositeGroupKey> values,
                        OutputCollector<Text, CompositeGroupKey> output, Reporter reporter)
                        throws IOException {
                int sum = 0;
                int length = 0;
                while (values.hasNext()) {
                CompositeGroupKey value = (CompositeGroupKey) values.next();
                sum += (Integer) value.getCount(); // process value
                length = (Integer) key.getLength();
            }
            CompositeGroupKey cgk = new CompositeGroupKey(sum,length);
            output.collect(key, cgk);
        }
}

这是类字数

import java.util.ArrayList;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.JobStatus;
import org.apache.hadoop.mapred.jobcontrol.Job;

import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.hadoop.util.StringUtils;

public class WordCount {

  public static void main(String[] args) {
    JobClient client = new JobClient();
    JobConf conf = new JobConf(WordCount.class);

// specify output types
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(CompositeGroupKey.class);

    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(CompositeGroupKey.class);

    // specify input and output dirs
    FileInputFormat.addInputPath(conf, new Path("input"));
    FileOutputFormat.setOutputPath(conf, new Path("output16"));

    // specify a mapper
    conf.setMapperClass(WordCountMapper.class);

// specify a reducer
    conf.setReducerClass(WordCountReducer.class);
    conf.setCombinerClass(WordCountReducer.class);

    client.setConf(conf);
    try {
      JobClient.runJob(conf);
    } catch (Exception e) {
      e.printStackTrace();
    }

  }
}  

And this is the groupcompositekey

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableUtils;

public class CompositeGroupKey implements WritableComparable<CompositeGroupKey> {
    int count;
    int length;

    public CompositeGroupKey(int c, int l) {
        this.count = c;
        this.length = l;
    }

    public void write(DataOutput out) throws IOException {
        WritableUtils.writeVInt(out, count);
        WritableUtils.writeVInt(out, length);
    }

    public void readFields(DataInput in) throws IOException {
        this.count = WritableUtils.readVInt(in);
        this.length = WritableUtils.readVInt(in);
    }

    public int compareTo(CompositeGroupKey pop) {
        return 0;
    }

    public int getCount() {
        return this.count;
    }

    public int getLength() {
        return this.length;
    }

}

现在我得到了这个错误：

org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:80)，org.apache.hadoop.io.serializer.WritableSerialization$WritableDeserializer.deserialize(WritableSerialization.java:62)，org.apache.hadoop.io.serializer.WritableSerialization$WritableDeserializer.deserialize(WritableSerialization.java:40)，org.apache.hadoop.mapred.Task$ValuesIterator.readNextValue(Task.java:738)，org.apache.hadoop.mapred.Task$ValuesIterator.next(Task.java：org.apache.hadoop.mapred.Task$CombineValuesIterator.next(Task.java:757) at WordCountReducer.reduce(WordCountReducer.java:24) at WordCountReducer.reduce(WordCountReducer.java:1) at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.combineAndSpill(MapTask.java:904) at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.sortAndSpill(MapTask.java:785) at org.apache.hadoop.mapred.MapTask$MapOutputBuffer.flush(MapTask.java:698) at org.apache.hadoop.mapred.MapTask.run(由: org.apache.hadoop.mapred.TaskTracker$Child.main(TaskTracker.java:2209)引起的: java.lang.NoSuchMethodException: CompositeGroupKey.() at java.lang.Class.getConstructor0(未知源) at java.lang.Class.getDeclaredConstructor(Unknown Source) at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:74)

我知道编码没有那么好，但是现在我们还不知道哪里出了问题，所以欢迎任何帮助！

java

hadoop

mapreduce

回答 2

Stack Overflow用户

发布于 2014-12-14 16:09:40

您必须在键类CompositeGroupKey中提供一个空的默认构造函数。它用于序列化。

只需添加：

public CompositeGroupKey() {
}

票数 3

Stack Overflow用户

发布于 2015-12-03 09:50:52

每当您看到一些异常，如下面给出的异常

java.lang.RuntimeException: java.lang.NoSuchMethodException: CompositeGroupKey.<init>()

然后就会出现对象实例化的问题，这意味着任何一个构造函数都可能不是present.Either。

默认构造函数或

参数化构造函数

编写参数化构造函数JVM时，除非公开声明，否则将取消默认构造函数。

RusIan Ostafiichuk给出的答案足以回答您的查询，但是我添加了更多的点来说明问题。

票数 1

页面原文内容由Stack Overflow提供。腾讯云小微IT领域专用引擎提供翻译支持

原文链接：

https://stackoverflow.com/questions/27471146

复制

相似问题

问Hadoop - MapReduce
EN

回答 2

Stack Overflow用户

Stack Overflow用户

社区

活动

圈层

关于

腾讯云开发者

热门产品

热门推荐

更多推荐

问Hadoop - MapReduceEN

回答 2

Stack Overflow用户

Stack Overflow用户

社区

活动

圈层

关于

腾讯云开发者

热门产品

热门推荐

更多推荐

问Hadoop - MapReduce
EN