Collaborative Filtering协同过滤推荐算法

来源：互联网发布：php视频教程百度网盘编辑：程序博客网时间：2024/05/22 05:25
from math import sqrtdataset={'Lisa Rose': {'Lady in the Water': 2.5, 'Snakes on a Plane': 3.5,'Just My Luck': 3.0, 'Superman Returns': 3.5, 'You, Me and Dupree': 2.5,'The Night Listener': 3.0},'Gene Seymour': {'Lady in the Water': 3.0, 'Snakes on a Plane': 3.5,'Just My Luck': 1.5, 'Superman Returns': 5.0,  'The Night Listener': 3.0,'You, Me and Dupree': 3.5},'Michael Phillips': {'Lady in the Water': 2.5, 'Snakes on a Plane': 3.0,'Superman Returns': 3.5, 'The Night Listener': 4.0},'Claudia Puig': {'Snakes on a Plane': 3.5, 'Just My Luck': 3.0,'The Night Listener': 4.5, 'Superman Returns': 4.0,'You, Me and Dupree': 2.5},'Mick LaSalle': {'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0,'Just My Luck': 2.0, 'Superman Returns': 3.0, 'The Night Listener': 3.0,'You, Me and Dupree': 2.0},'Jack Matthews': {'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0,'The Night Listener': 3.0, 'Superman Returns': 5.0, 'You, Me and Dupree': 3.5},'Toby': {'Snakes on a Plane':4.5,'You, Me and Dupree':1.0,'Superman Returns':4.0}}def similarity_score(person1,person2):# Returns ratio Euclidean distance score of person1 and person2 both_viewed = {}# To get both rated items by person1 and person2for item in dataset[person1]:if item in dataset[person2]:both_viewed[item] = 1# Conditions to check they both have an common rating itemsif len(both_viewed) == 0:return 0# Finding Euclidean distance sum_of_eclidean_distance = []for item in dataset[person1]:if item in dataset[person2]:sum_of_eclidean_distance.append(pow(dataset[person1][item] - dataset[person2][item],2))sum_of_eclidean_distance = sum(sum_of_eclidean_distance)return 1/(1+sqrt(sum_of_eclidean_distance))def pearson_correlation(person1,person2):# To get both rated itemsboth_rated = {}for item in dataset[person1]:if item in dataset[person2]:both_rated[item] = 1number_of_ratings = len(both_rated)# Checking for number of ratings in commonif number_of_ratings == 0:return 0# Add up all the preferences of each userperson1_preferences_sum = sum([dataset[person1][item] for item in both_rated])person2_preferences_sum = sum([dataset[person2][item] for item in both_rated])# Sum up the squares of preferences of each userperson1_square_preferences_sum = sum([pow(dataset[person1][item],2) for item in both_rated])person2_square_preferences_sum = sum([pow(dataset[person2][item],2) for item in both_rated])# Sum up the product value of both preferences for each itemproduct_sum_of_both_users = sum([dataset[person1][item] * dataset[person2][item] for item in both_rated])# Calculate the pearson scorenumerator_value = product_sum_of_both_users - (person1_preferences_sum*person2_preferences_sum/number_of_ratings)denominator_value = sqrt((person1_square_preferences_sum - pow(person1_preferences_sum,2)/number_of_ratings) * (person2_square_preferences_sum -pow(person2_preferences_sum,2)/number_of_ratings))if denominator_value == 0:return 0else:r = numerator_value/denominator_valuereturn r def most_similar_users(person,number_of_users):# returns the number_of_users (similar persons) for a given specific person.scores = [(pearson_correlation(person,other_person),other_person) for other_person in dataset if  other_person != person ]# Sort the similar persons so that highest scores person will appear at the firstscores.sort()scores.reverse()return scores[0:number_of_users]def user_reommendations(person):# Gets recommendations for a person by using a weighted average of every other user's rankingstotals = {}simSums = {}rankings_list =[]for other in dataset:# don't compare me to myselfif other == person:continuesim = pearson_correlation(person,other)#print ">>>>>>>",sim# ignore scores of zero or lowerif sim <=0: continuefor item in dataset[other]:# only score movies i haven't seen yetif item not in dataset[person] or dataset[person][item] == 0:# Similrity * scoretotals.setdefault(item,0)totals[item] += dataset[other][item]* sim# sum of similaritiessimSums.setdefault(item,0)simSums[item]+= sim# Create the normalized listrankings = [(total/simSums[item],item) for item,total in totals.items()]rankings.sort()rankings.reverse()# returns the recommended itemsrecommendataions_list = [recommend_item for score,recommend_item in rankings]return recommendataions_listprint user_reommendations('Toby')
探索推荐引擎内部的秘密，第 2 部分: 深入推荐引擎相关算法 - 协同过滤
探索推荐引擎内部的秘密，第 3 部分: 深入推荐引擎相关算法 - 聚类
Collaborative Filtering协同过滤推荐算法

memory-based 协同过滤（CF）方法

推荐方法总结