数据挖掘笔记-基于项目协同过滤推荐的简单实现

来源:互联网 发布:编程不等于怎么表示 编辑:程序博客网 时间:2024/04/27 13:48

基本思想:预先根据所有用户的历史偏好数据计算项目之间的相似性,然后把与用户喜欢的项目相类似的项目推荐给用户。

基本过程:首先根据用户的偏好数据寻找出用户没有评分的项目,然后在用户没有评分的所有项目中,对每个项目预测一个可能的评分,这里会涉及到相似度计算和组合权重计算,最后对这些项目的评分从高到低进行排序,推荐前N个项目。


Java实现代码如下:

public class BasedItemRecommend {//最近最相邻项目数量public static final int NEAREST_ITEM = 100;//推荐结果数量public static final int RECOMMEND_NUM = 10;//电影映射private Map<Long, Movie> movieMap = DataLoader.getMovieMap();//用户电影映射private Map<Long, Map<Long, Integer>> userMovies = DataLoader.getUserMovieMap();//电影用户映射private Map<Long, Map<Long, Integer>> movieUsers = DataLoader.getMovieUserMap();//余弦距离计算public double cosine(Map<Long, Integer> p1, Map<Long, Integer> p2) {double x = 0, y = 0, z = 0;for (Map.Entry<Long, Integer> entry : p1.entrySet()) {Long k1 = entry.getKey();int v1 = entry.getValue();if (p2.containsKey(k1)) {x += v1 * p2.get(k1);}y += Math.pow(v1, 2);}for (Map.Entry<Long, Integer> entry : p2.entrySet()) {z += Math.pow(entry.getValue(), 2);}if (y == 0 || z ==0) return 0;return x / (Math.pow(y, 0.5) * Math.pow(z, 0.5));}//计算距离当前项目最近的N个项目public List<Map.Entry<Long, Double>> calNearestItem(Long movieId) {Map<Long, Integer> curUserRating = movieUsers.get(movieId);Map<Long, Double> movieRatingMap = new HashMap<Long, Double>();for (Map.Entry<Long, Map<Long, Integer>> entry : movieUsers.entrySet()) {Map<Long, Integer> userRating = entry.getValue();double distance = cosine(curUserRating, userRating);movieRatingMap.put(entry.getKey(), distance);}List<Map.Entry<Long, Double>> list = new ArrayList<Map.Entry<Long, Double>>(movieRatingMap.entrySet());Collections.sort(list, new Comparator<Map.Entry<Long, Double>>() {@Overridepublic int compare(Entry<Long, Double> o1, Entry<Long, Double> o2) {return o2.getValue().compareTo(o1.getValue());}});return list.subList(0, NEAREST_ITEM);}//预测未评分项目的评分public double predictRating(Long userId, Long unratingMovieId) {double similaritySum = 0.0, ratingSimilaritySum = 0.0;Map<Long, Integer> movieRatings = userMovies.get(userId);for (Map.Entry<Long, Integer> entry : movieRatings.entrySet()) {Long ratingMovieId = entry.getKey();int userRating = entry.getValue();System.out.println("urating: " + userRating);Map<Long, Integer> rating1 = movieUsers.get(ratingMovieId);Map<Long, Integer> rating2 = movieUsers.get(unratingMovieId);double similarity = 0;if (null != rating1 && null != rating2) {similarity = cosine(rating1, rating2);}similaritySum += similarity;ratingSimilaritySum += similarity * userRating;}if (similaritySum == 0) return 0;return ratingSimilaritySum / similaritySum;}//根据用户当前数据推荐项目public void recommend(Long userId) {Set<Long> userRatingMovieIds = userMovies.get(userId).keySet();Set<Long> movieIds = movieMap.keySet();Set<Long> userUnratingMovieIds = new HashSet<Long>();for (Long movieId : movieIds) {if (userRatingMovieIds.contains(movieId)) continue;userUnratingMovieIds.add(movieId);}Map<Long, Double> movieRatingMap = new HashMap<Long, Double>();for (Long userUnratingMovieId : userUnratingMovieIds) {double rating = predictRating(userId, userUnratingMovieId);System.out.println("rating: " + rating);movieRatingMap.put(userUnratingMovieId, rating);}List<Map.Entry<Long, Double>> movieRatingList = new ArrayList<Map.Entry<Long, Double>>(movieRatingMap.entrySet());Collections.sort(movieRatingList, new Comparator<Map.Entry<Long, Double>>() {@Overridepublic int compare(Map.Entry<Long, Double> o1, Map.Entry<Long, Double> o2) {return o2.getValue().compareTo(o1.getValue());}});for (int i = 0, len = movieRatingList.size(); i < len; i++) {Map.Entry<Long, Double> movieRating = movieRatingList.get(i);System.out.println("movie: " + movieRating.getKey() + " rating: " + movieRating.getValue());if (i == RECOMMEND_NUM) break;}}public static void main(String[] args) {new BasedItemRecommend().recommend(10L);}}


代码托管:https://github.com/fighting-one-piece/repository-datamining.git


0 0