1.开发环境:
Windows 2008 64bit
Java 1.6.0_30
MyEclipse 6.5
环境部署见:http://www.linuxidc.com/Linux/2014-02/96528.htm
2.hadoop集群环境:
Oracle Linux Enterprise 5.9
Java 1.6.0_18
Hadoop:hadoop 1.2.1三节点
namenode:10.1.32.91
datanode:10.1.32.93
datanode:10.1.32.95
环境部署见: http://www.linuxidc.com/Linux/2014-02/96529.htm
3.各java类功能介绍:
Recommend.java,主任务启动程序
Step1.java,按用户分组,计算所有物品出现的组合列表,得到用户对物品的评分矩阵
Step2.java,对物品组合列表进行计数,建立物品的同现矩阵
Step3.java,对同现矩阵和评分矩阵转型
Step4.java,合并矩阵,并计算推荐结果列表
HdfsDAO.java,HDFS操作工具类
4.执行代码和运行结果:
Recommend.java 代码:
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Pattern;
import org.apache.hadoop.mapred.JobConf;
public class Recommend {
public static final String HDFS = "hdfs://10.1.32.91:9000";
public static final Pattern DELIMITER = Pattern.compile("[\t,]");
public static void main(String[] args) throws Exception {
Map<String, String> path = new HashMap<String, String>();
path.put("data", "C:\\Users\\licz\\workspace\\Mapreduce1\\small2.csv");
path.put("Step1Input", HDFS + "/user/hdfs/recommend");
path.put("Step1Output", path.get("Step1Input") + "/step1");
path.put("Step2Input", path.get("Step1Output"));
path.put("Step2Output", path.get("Step1Input") + "/step2");
path.put("Step3Input1", path.get("Step1Output"));
path.put("Step3Output1", path.get("Step1Input") + "/step3_1");
path.put("Step3Input2", path.get("Step2Output"));
path.put("Step3Output2", path.get("Step1Input") + "/step3_2");
path.put("Step4Input1", path.get("Step3Output1"));
path.put("Step4Input2", path.get("Step3Output2"));
path.put("Step4Output", path.get("Step1Input") + "/step4");
Step1.run(path);
Step2.run(path);
Step3.run1(path);
Step3.run2(path);
Step4.run(path);
System.exit(0);
}
public static JobConf config() {
JobConf conf = new JobConf(Recommend.class);
conf.setJobName("Recommand");
conf.addResource("classpath:/hadoop/core-site.xml");
conf.addResource("classpath:/hadoop/hdfs-site.xml");
conf.addResource("classpath:/hadoop/mapred-site.xml");
//conf.set("io.sort.mb", "1024");
return conf;
}
}