一、项目需求
现在有一份来自美国国家海洋和大气管理局的数据集,里面包罗近30年每个气象站、每小时的气候预报数据,每个报告的文件巨细大约15M。一共有10个气象站,每个报告文件的名字包罗气象站ID,每条记录包罗气温、风向、气候状况等多个字段信息。现在要求统计美国各气象站30年均匀气温。
二、数据格式
一共10份气象站的数据
文档里面的数据格式,注意 -9999 说明数据缺失
三、项目开辟
3.1 在windows 进行开辟
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-client</artifactId>
- <version>2.10.2</version>
- </dependency>
复制代码
- package com.feifei.mapreduce;
- import org.apache.hadoop.conf.Configuration;
- import org.apache.hadoop.fs.Path;
- import org.apache.hadoop.io.IntWritable;
- import org.apache.hadoop.io.Text;
- import org.apache.hadoop.mapred.FileSplit;
- import org.apache.hadoop.mapreduce.Job;
- import org.apache.hadoop.mapreduce.Mapper;
- import org.apache.hadoop.mapreduce.Reducer;
- import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
- import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
- import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
- import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
- import java.io.IOException;
- public class WeatherAnalysis {
- public static class MyMapper extends Mapper<Object, Text, Text, IntWritable> {
- @Override
- protected void map(Object key, Text value, Mapper<Object, Text, Text, IntWritable>.Context context) throws IOException, InterruptedException {
- String line = value.toString();
- int temperature = Integer.parseInt(line.substring(14, 19).trim());
- if(temperature != -9999){
- FileSplit fileSplit = (FileSplit) context.getInputSplit();
- String id = fileSplit.getPath().getName().substring(5, 10);
- context.write(new Text(id), new IntWritable(temperature));
- }
- }
- }
- public static class MyReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
- private IntWritable mean = new IntWritable();
- @Override
- protected void reduce(Text key, Iterable<IntWritable> values, Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException {
- int sum = 0;
- int count = 0;
- for (IntWritable val : values) {
- sum += val.get();
- }
- mean.set(sum / count);
- context.write(key, mean);
- }
- }
- public static void main(String[] args) throws Exception {
- Configuration conf = new Configuration();
- Job job = Job.getInstance(conf);
- job.setJarByClass(WeatherAnalysis.class);
- job.setJobName("WeatherAnalysis");
- job.setInputFormatClass(TextInputFormat.class);
- job.setOutputFormatClass(TextOutputFormat.class);
- FileInputFormat.addInputPath(job, new Path(args[0]));
- FileOutputFormat.setOutputPath(job, new Path(args[1]));
- job.setMapperClass(WeatherAnalysis.MyMapper.class);
- job.setReducerClass(WeatherAnalysis.MyReducer.class);
- job.setMapOutputKeyClass(Text.class);
- job.setMapOutputValueClass(IntWritable.class);
- job.setOutputKeyClass(Text.class);
- job.setOutputValueClass(IntWritable.class);
- job.waitForCompletion(true);
- }
- }
复制代码
3.2 运行效果
3.3 对项目打包
免责声明:如果侵犯了您的权益,请联系站长,我们会及时删除侵权内容,谢谢合作!更多信息从访问主页:qidao123.com:ToB企服之家,中国第一个企服评测及商务社交产业平台。 |