文章/答案/技术大牛

发布

社区首页 >问答首页 >使用TotalOrderPartitioner MapReduce时面临错误

问使用TotalOrderPartitioner MapReduce时面临错误
EN

Stack Overflow用户

提问于 2016-01-21 02:48:46

回答 1查看 478关注 0票数 1

我写了下面的程序。我在不使用TotalOrderPartitioner的情况下运行了它，并且运行得很好。所以我不认为Mapper或Reducer类本身有任何问题。

但是，当我包含TotalOrderPartitioner的代码--即编写分区文件，然后将其放入DistributedCache --时，我会得到以下错误:真正不知道如何处理它。

train@sandbox TOTALORDERPARTITIONER$ hadoop totalorderpart.jar average.AverageJob各县合计

16/01/18 04:14:00info input.FileInputFormat:进程的总输入路径:4 16/01/18 04:14:00 INFO partition.InputSampler:使用6样例16/01/18 04:14:00 INFO zlib.ZlibFactory:成功加载和初始化本机-zlib库16/01/18 04:14:00 INFO compress.CodecPool: Got全新的压缩器.deflate java.io.IOException:错误的键类: org.apache.hadoop.io.LongWritable不是类org.apache.hadoop。org.apache.hadoop.io.SequenceFile$RecordCompressWriter.append(SequenceFile.java:1380)在org.apache.hadoop.mapreduce.lib.partition.InputSampler.writePartitionFile(InputSampler.java:340) at average.AverageJob.run(AverageJob.java:132)在org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:70) at average.AverageJob.main(AverageJob.java:146)在太阳。( reflect.NativeMethodAccessorImpl.invoke0(Native方法)在sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25) at java.lang.reflect.Method.invoke(Method.java:597) at org.apache.hadoop.util.RunJar.main(RunJar.java:212)

我的代码

package average;

import java.io.IOException;
import java.net.URI;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.partition.InputSampler;
import org.apache.hadoop.mapreduce.lib.partition.TotalOrderPartitioner;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;


public class AverageJob extends Configured implements Tool {

public enum Counters {MAP, COMINE, REDUCE};

public static class AverageMapper extends Mapper<LongWritable, Text, Text, Text> {

    private Text mapOutputKey = new Text();
    private Text mapOutputValue = new Text();
    @Override
    protected void map(LongWritable key, Text value, Context context)
            throws IOException, InterruptedException {

        String[] words = StringUtils.split(value.toString(), '\\', ',');
        mapOutputKey.set(words[1].trim());

        StringBuilder moValue = new StringBuilder();
        moValue.append(words[9].trim()).append(",1");
        mapOutputValue.set(moValue.toString());
        context.write(mapOutputKey, mapOutputValue);

        context.getCounter(Counters.MAP).increment(1);
    }
}

public static class AverageCombiner extends Reducer<Text, Text, Text, Text> {

    private Text combinerOutputValue = new Text();

    @Override
    protected void reduce(Text key, Iterable<Text> values, Context context)
            throws IOException, InterruptedException {

        int count=0;
        long sum=0;
        for(Text value: values)
        {
            String[] strValues = StringUtils.split(value.toString(), ','); 
            sum+= Long.parseLong(strValues[0]);
            count+= Integer.parseInt(strValues[1]);
        }
        combinerOutputValue.set(sum + "," + count);
        context.write(key, combinerOutputValue);

        context.getCounter(Counters.COMINE).increment(1);
    }
}


public static class AverageReducer extends Reducer<Text, Text, Text, DoubleWritable> {


    private DoubleWritable reduceOutputKey = new DoubleWritable();

    @Override
    protected void reduce(Text key, Iterable<Text> values, Context context)
            throws IOException, InterruptedException {

        int count=0;
        double sum=0;
        for(Text value: values)
        {
            String[] strValues = StringUtils.split(value.toString(), ',');
            sum+= Double.parseDouble(strValues[0]);
            count+= Integer.parseInt(strValues[1]);
        }

        reduceOutputKey.set(sum/count);
        context.write(key, reduceOutputKey);

        context.getCounter(Counters.REDUCE).increment(1);
    }

}


@Override
public int run(String[] args) throws Exception {

    Configuration conf = getConf();
    Job job = Job.getInstance(conf);
    job.setJarByClass(getClass());

    Path in = new Path(args[0]);
    Path out = new Path(args[1]);
    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(DoubleWritable.class);

    job.setMapperClass(AverageMapper.class);
    job.setCombinerClass(AverageCombiner.class);

    job.setPartitionerClass(TotalOrderPartitioner.class);

    job.setReducerClass(AverageReducer.class);

    job.setNumReduceTasks(6);

    InputSampler.Sampler<Text, Text> sampler = new InputSampler.RandomSampler<Text, Text>(0.2, 6, 5);
    InputSampler.writePartitionFile(job, sampler);

    String partitionFile = TotalOrderPartitioner.getPartitionFile(conf);
    URI partitionUri = new URI(partitionFile + "#" + TotalOrderPartitioner.DEFAULT_PATH);
    job.addCacheFile(partitionUri);

    return job.waitForCompletion(true)?0:1;
}

public static void main(String[] args) {

    int result=0;
    try
    {
        result = ToolRunner.run(new Configuration(), new AverageJob(), args);
        System.exit(result);
    }
    catch (Exception e)
    {
        e.printStackTrace();            
    }
}
}

hadoop-partitioning

hadoop

mapreduce

回答 1

Stack Overflow用户

发布于 2016-03-20 02:35:20

TotalOrderPartitioner不对Mapper的输出运行它的采样，而是在输入数据集上运行它的采样。您的输入格式以LongWritable作为键，文本作为值。相反，您正在尝试调用RandomSampler，声称您的格式具有作为键的文本和作为值的文本。这是InputSampler在运行时发现的不匹配，因此消息

错误的键类: org.apache.hadoop.io.LongWritable不是org.apache.hadoop.io.Text类

这意味着它试图找到文本作为键(基于您的参数化)，但是它找到了LongWritable。

票数 1

页面原文内容由Stack Overflow提供。腾讯云小微IT领域专用引擎提供翻译支持

原文链接：

https://stackoverflow.com/questions/34914535

复制

相似问题

问使用TotalOrderPartitioner MapReduce时面临错误
EN

回答 1

Stack Overflow用户

社区

活动

圈层

关于

腾讯云开发者

热门产品

热门推荐

更多推荐

问使用TotalOrderPartitioner MapReduce时面临错误EN

回答 1

Stack Overflow用户

社区

活动

圈层

关于

腾讯云开发者

热门产品

热门推荐

更多推荐

问使用TotalOrderPartitioner MapReduce时面临错误
EN