hadoop实现购物商城推荐系统
[日期:2014-07-04] |
来源:uml.org.cn
作者:潇洒子弦的博客 |
[字体:大 中 小]
|
package xian.zhang.common; import java.util.regex.Pattern; public class Util { public static final Pattern DELIMITER = Pattern.compile("[\t,]"); }
package xian.zhang.core; import java.io.IOException; import java.util.Iterator; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; /** * 将输入数据 userid1,product1 userid1,product2 userid1,product3 * 合并成 userid1 product1,product2,product3输出 * @author zx * */ public class CombinProductInUser { public static class CombinProductMapper extends Mapper<LongWritable, Text, IntWritable, Text>{ @Override protected void map(LongWritable key, Text value,Context context) throws IOException, InterruptedException { String[] items = value.toString().split(","); context.write(new IntWritable(Integer.parseInt(items[0])), new Text(items[1])); } } public static class CombinProductReducer extends Reducer<IntWritable, Text, IntWritable, Text>{ @Override protected void reduce(IntWritable key, Iterable<Text> values,Context context) throws IOException, InterruptedException { StringBuffer sb = new StringBuffer(); Iterator<Text> it = values.iterator(); sb.append(it.next().toString()); while(it.hasNext()){ sb.append(",").append(it.next().toString()); } context.write(key, new Text(sb.toString())); } } @SuppressWarnings("deprecation") public static boolean run(Path inPath,Path outPath) throws IOException, ClassNotFoundException, InterruptedException{ Configuration conf = new Configuration(); Job job = new Job(conf,"CombinProductInUser"); job.setJarByClass(CombinProductInUser.class); job.setMapperClass(CombinProductMapper.class); job.setReducerClass(CombinProductReducer.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, inPath); FileOutputFormat.setOutputPath(job, outPath); return job.waitForCompletion(true); } }
|
package xian.zhang.core; import java.io.IOException; import java.util.Iterator; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; /** * 将输入数据 userid1,product1 userid1,product2 userid1,product3 * 合并成 userid1 product1,product2,product3输出 * @author zx * */ public class CombinProductInUser { public static class CombinProductMapper extends Mapper<LongWritable, Text, IntWritable, Text>{ @Override protected void map(LongWritable key, Text value,Context context) throws IOException, InterruptedException { String[] items = value.toString().split(","); context.write(new IntWritable(Integer.parseInt(items[0])), new Text(items[1])); } } public static class CombinProductReducer extends Reducer<IntWritable, Text, IntWritable, Text>{ @Override protected void reduce(IntWritable key, Iterable<Text> values,Context context) throws IOException, InterruptedException { StringBuffer sb = new StringBuffer(); Iterator<Text> it = values.iterator(); sb.append(it.next().toString()); while(it.hasNext()){ sb.append(",").append(it.next().toString()); } context.write(key, new Text(sb.toString())); } } @SuppressWarnings("deprecation") public static boolean run(Path inPath,Path outPath) throws IOException, ClassNotFoundException, InterruptedException{ Configuration conf = new Configuration(); Job job = new Job(conf,"CombinProductInUser"); job.setJarByClass(CombinProductInUser.class); job.setMapperClass(CombinProductMapper.class); job.setReducerClass(CombinProductReducer.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, inPath); FileOutputFormat.setOutputPath(job, outPath); return job.waitForCompletion(true); } }
|
package xian.zhang.core; import java.io.IOException; import org.apache.hadoop.fs.Path; public class Main { public static void main(String[] args) throws ClassNotFoundException, IOException, InterruptedException { if(args.length < 2){ throw new IllegalArgumentException("要有两个参数,数据输入的路径和输出路径"); } Path inPath1 = new Path(args[0]); Path outPath1 = new Path(inPath1.getParent()+"/CombinProduct"); Path inPath2 = outPath1; Path outPath2 = new Path(args[1]); if(CombinProductInUser.run(inPath1, outPath1)){ System.exit(ProductCo_occurrenceMatrix.run(inPath2, outPath2)?0:1); } } }
|
ShopxxUserRecommend<相同购物喜好的好友推荐>
整个项目分两部,一,以商品对用户进行分组,二,求出用户的同现矩阵。
原理和ShopxxProductRecommend一样
下面附上代码
package xian.zhang.common; import java.util.regex.Pattern; public class Util { public static final Pattern DELIMITER = Pattern.compile("[\t,]"); }
|
package xian.zhang.core; import java.io.IOException; import java.util.Iterator; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; /** * 将输入数据 userid1,product1 userid1,product2 userid1,product3 * 合并成 productid1 user1,user2,user3输出 * @author zx * */ public class CombinUserInProduct { public static class CombinUserMapper extends Mapper<LongWritable, Text, IntWritable, Text>{ @Override protected void map(LongWritable key, Text value,Context context) throws IOException, InterruptedException { String[] items = value.toString().split(","); context.write(new IntWritable(Integer.parseInt(items[1])), new Text(items[0])); } } public static class CombinUserReducer extends Reducer<IntWritable, Text, IntWritable, Text>{ @Override protected void reduce(IntWritable key, Iterable<Text> values,Context context) throws IOException, InterruptedException { StringBuffer sb = new StringBuffer(); Iterator<Text> it = values.iterator(); sb.append(it.next().toString()); while(it.hasNext()){ sb.append(",").append(it.next().toString()); } context.write(key, new Text(sb.toString())); } } @SuppressWarnings("deprecation") public static boolean run(Path inPath,Path outPath) throws IOException, ClassNotFoundException, InterruptedException{ Configuration conf = new Configuration(); Job job = new Job(conf,"CombinUserInProduct"); job.setJarByClass(CombinUserInProduct.class); job.setMapperClass(CombinUserMapper.class); job.setReducerClass(CombinUserReducer.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, inPath); FileOutputFormat.setOutputPath(job, outPath); return job.waitForCompletion(true); } }
|
package xian.zhang.core; import java.io.IOException; import java.util.Iterator; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import xian.zhang.common.Util; /** * 用户的同先矩阵 * @author zx * */ public class UserCo_occurrenceMatrix { public static class Co_occurrenceMapper extends Mapper<LongWritable, Text, Text, IntWritable>{ IntWritable one = new IntWritable(1); @Override protected void map(LongWritable key, Text value, Context context)throws IOException, InterruptedException { String[] products = Util.DELIMITER.split(value.toString()); for(int i=1;i<products.length;i++){ for(int j=1;j<products.length;j++){ if(i != j){ context.write(new Text(products[i] + ":" + products[j]), one); } } } } } public static class Co_occurrenceReducer extends Reducer<Text, IntWritable, NullWritable, Text>{ NullWritable nullKey =NullWritable.get(); @Override protected void reduce(Text key, Iterable<IntWritable> values,Context context) throws IOException, InterruptedException { int sum = 0; Iterator<IntWritable> it = values.iterator(); while(it.hasNext()){ sum += it.next().get(); } context.write(nullKey, new Text(key.toString().replace(":", ",") + "," + sum)); } } @SuppressWarnings("deprecation") public static boolean run(Path inPath,Path outPath) throws IOException, ClassNotFoundException, InterruptedException{ Configuration conf = new Configuration(); Job job = new Job(conf,"UserCo_occurrenceMatrix"); job.setJarByClass(UserCo_occurrenceMatrix.class); job.setMapperClass(Co_occurrenceMapper.class); job.setReducerClass(Co_occurrenceReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(NullWritable.class); job.setOutputKeyClass(Text.class); FileInputFormat.addInputPath(job, inPath); FileOutputFormat.setOutputPath(job, outPath); return job.waitForCompletion(true); } }
|
package xian.zhang.core; import java.io.IOException; import org.apache.hadoop.fs.Path; public class Main { public static void main(String[] args) throws ClassNotFoundException, IOException, InterruptedException { if(args.length < 2){ throw new IllegalArgumentException("要有两个参数,数据输入的路径和输出路径"); } Path inPath1 = new Path(args[0]); Path outPath1 = new Path(inPath1.getParent()+"/CombinUser"); Path inPath2 = outPath1; Path outPath2 = new Path(args[1]); if(CombinUserInProduct.run(inPath1, outPath1)){ System.exit(UserCo_occurrenceMatrix.run(inPath2, outPath2)?0:1); } } }
|
代码在github上有
git@github.com:chaoku/ShopxxProductRecommend.git
|
评论声明
- 尊重网上道德,遵守中华人民共和国的各项有关法律法规
- 承担一切因您的行为而直接或间接导致的民事或刑事法律责任
- 本站管理人员有权保留或删除其管辖留言中的任意内容
- 本站有权在网站内转载或引用您的评论
- 参与本评论即表明您已经阅读并接受上述条款
|
|
|