• mapreduce--单词分析


    WCDriver

    1. package com.atguigu.mr.wordcount;
    2. import org.apache.hadoop.conf.Configuration;
    3. import org.apache.hadoop.fs.Path;
    4. import org.apache.hadoop.io.LongWritable;
    5. import org.apache.hadoop.io.Text;
    6. import org.apache.hadoop.mapreduce.Job;
    7. import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    8. import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    9. import java.io.IOException;
    10. /*
    11. 程序的入口
    12. 1.创建job实例并允许
    13. */
    14. public class WCDriver {
    15. public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    16. // 创建job实例
    17. Configuration conf=new Configuration();
    18. Job job=Job.getInstance(conf);
    19. // 给job赋值
    20. // 关联本程序的jar 运行必须写
    21. job.setJarByClass(WCDriver.class);
    22. // 设置mapper reduce类
    23. job.setMapperClass(WCMapper.class);
    24. job.setReducerClass(WCReducer.class);
    25. // 设置mapper输出的key value的类型
    26. job.setMapOutputKeyClass(Text.class);
    27. job.setMapOutputValueClass(LongWritable.class);
    28. // 设置最终输出的key value类型
    29. job.setOutputKeyClass(Text.class);
    30. job.setOutputValueClass(LongWritable.class);
    31. /*
    32. 设置输出路径
    33. windows:
    34. FileInputFormat.setInputPaths(job,new Path("//"));
    35. FileInputFormat.setOutputPaths(job,new Path("//"));
    36. */
    37. FileInputFormat.setInputPaths(job,new Path(args[0]));
    38. FileOutputFormat.setOutputPath(job,new Path(args[1]));
    39. // FileInputFormat.setInputPaths(job,new Path("F:\\input"));
    40. // FileOutputFormat.setOutputPath(job,new Path("F:\\aa\\output"));
    41. // 运行job
    42. boolean b=job.waitForCompletion(true);
    43. System.out.println("b===="+ b);
    44. }
    45. }

     WCMapper

    1. package com.atguigu.mr.wordcount;
    2. import org.apache.commons.net.imap.IMAP;
    3. import org.apache.hadoop.io.LongWritable;
    4. import org.apache.hadoop.io.Text;
    5. import org.apache.hadoop.mapreduce.Mapper;
    6. import org.xbill.DNS.LOCRecord;
    7. import java.io.IOException;
    8. /*
    9. mapper阶段会运行MapTask -会调用Mappper类
    10. 在该类中实现业务逻辑
    11. */
    12. public class WCMapper extends Mapper {
    13. private Text outKey=new Text();
    14. private LongWritable outValue=new LongWritable();
    15. /**
    16. *
    17. * @param key 读取数据时的偏移量
    18. * @param value 读取的数据
    19. * @param context 上下文
    20. * @throws IOException
    21. * @throws InterruptedException
    22. */
    23. @Override
    24. protected void map(LongWritable key, Text value, Mapper.Context context) throws IOException, InterruptedException {
    25. // super.map(key, value, context);
    26. // 1.将数据进行切割
    27. // 1.1将Text转换成string---为了使用String API
    28. String line=value.toString();
    29. // 1.2对数据切割
    30. String[] words =line.split("");
    31. // 2.遍历数据
    32. for (String word:words){
    33. // 3.封装key,value
    34. // 创建key,value对象
    35. // 赋值
    36. outKey.set(word);
    37. outValue.set(1);
    38. // 4.将 key,value写进去
    39. context.write(outKey,outValue);
    40. }
    41. //
    42. //
    43. }
    44. }

     WCReduce

    1. package com.atguigu.mr.wordcount;
    2. import org.apache.hadoop.classification.InterfaceAudience;
    3. import org.apache.hadoop.io.LongWritable;
    4. import org.apache.hadoop.io.Text;
    5. import org.apache.hadoop.mapreduce.Reducer;
    6. import java.io.IOException;
    7. /*
    8. reduce阶段会运行reduceTask -会调用reducer类
    9. 在该类中实现业务逻辑
    10. */
    11. public class WCReducer extends Reducer {
    12. private LongWritable outValue=new LongWritable();
    13. /**
    14. *
    15. * @param key
    16. * @param values
    17. * @param context
    18. * @throws IOException
    19. * @throws InterruptedException
    20. */
    21. @Override
    22. protected void reduce(Text key, Iterable values, Reducer.Context context) throws IOException, InterruptedException {
    23. // super.reduce(key, values, context);
    24. // super.reduce(key, values, context);
    25. long sum =0; //value的和
    26. // 遍历所有的value
    27. for (LongWritable value : values){
    28. long v=value.get();
    29. // 累加
    30. sum+=v;
    31. }
    32. outValue.set(sum);
    33. context.write(key,outValue);
    34. }
    35. }

     

  • 相关阅读:
    一点C知识:数据类型和内存地址。
    文件路径操作
    lua基础之io
    C基础-操作符详解
    DC电源模块低温是否影响转换效率
    VUE 文件导出下载
    关于layui upload上传组件上传文件无反应的问题
    HTML制作个人网页制作(简单静态HTML个人博客网页作品)
    高压MOS管1000V/2A 可代替IXFP4N100 数据表(PDF)
    【计算机基础知识7】垃圾回收机制与内存泄漏
  • 原文地址:https://blog.csdn.net/weixin_74711824/article/details/134408954