实现过程所需的java代码 1,在hdfs目录/tmp/input/wordcount中有一系列文件,内容均为","号分隔, 求按","号分隔的各个元素的出现频率,输出到目录/tmp/个人用户名的hdfs目录中。
实现代码:
package com.tledu.hadoop.mr.homework;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileStatus;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import java.io.IOException;import java.util.ArrayList;import java.util.List;import java.util.StringTokenizer;public class HomeWork1 { public static class HomeWork1Mapper extends Mapper
2、在hdfs目录/tmp/input/wordcount目录中有一系列文件,内容为","号分隔,分隔后的元素均为数值类型、字母、中文,求所有出现的数值的平均值
实现代码:
package com.tledu.hadoop.mr.homework;import com.tledu.hadoop.utils.RegUtils;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileStatus;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.DoubleWritable;import org.apache.hadoop.io.DoubleWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.input.FileSplit;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import java.io.IOException;import java.util.ArrayList;import java.util.List;import java.util.StringTokenizer;public class HomeWork3 { public static class HomeWork3Mapper extends Mapper
3,在hdfs目录/tmp/input/wordcount目录中有一系列文件,内容为","号分隔,同时在hdfs路径/tmp/black.txt黑名单文件,一行一个单词用于存放不记入统计的单词列表。求按","号分隔的各个元素去除掉黑名单后的出现频率,输出到目录/tmp/output/个人用户名的hdfs目录中。
实现代码:
package com.tledu.hadoop.mr.homework;import com.tledu.hadoop.utils.RegUtils;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileStatus;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.DoubleWritable;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.input.FileSplit;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.util.GenericOptionsParser;import java.io.IOException;import java.util.ArrayList;import java.util.List;import java.util.StringTokenizer;public class HomeWork5 { public static class HomeWork5Mapper extends Mapper