数据挖掘之曼哈顿距离、欧几里距离、明氏距离、皮尔逊相关系数、余弦相似度Python实现代码
来源:互联网 发布:m720 编程按钮 编辑:程序博客网 时间:2024/05/24 06:33
# -*- coding:utf8 -*-
from math import sqrt
users = {"Angelica": {"Blues Traveler": 3.5, "Broken Bells": 2.0, "Norah Jones": 4.5, "Phoenix": 5.0, "Slightly Stoopid": 1.5, "The Strokes": 2.5, "Vampire Weekend": 2.0},
"Bill":{"Blues Traveler": 2.0, "Broken Bells": 3.5, "Deadmau5": 4.0, "Phoenix": 2.0, "Slightly Stoopid": 3.5, "Vampire Weekend": 3.0},
"Chan": {"Blues Traveler": 5.0, "Broken Bells": 1.0, "Deadmau5": 1.0, "Norah Jones": 3.0, "Phoenix": 5, "Slightly Stoopid": 1.0},
"Dan": {"Blues Traveler": 3.0, "Broken Bells": 4.0, "Deadmau5": 4.5, "Phoenix": 3.0, "Slightly Stoopid": 4.5, "The Strokes": 4.0, "Vampire Weekend": 2.0},
"Hailey": {"Broken Bells": 4.0, "Deadmau5": 1.0, "Norah Jones": 4.0, "The Strokes": 4.0, "Vampire Weekend": 1.0},
"Jordyn": {"Broken Bells": 4.5, "Deadmau5": 4.0, "Norah Jones": 5.0, "Phoenix": 5.0, "Slightly Stoopid": 4.5, "The Strokes": 4.0, "Vampire Weekend": 4.0},
"Sam": {"Blues Traveler": 5.0, "Broken Bells": 2.0, "Norah Jones": 3.0, "Phoenix": 5.0, "Slightly Stoopid": 4.0, "The Strokes": 5.0},
"Veronica": {"Blues Traveler": 3.0, "Norah Jones": 5.0, "Phoenix": 4.0, "Slightly Stoopid": 2.5, "The Strokes": 3.0}
}
def manhattan(rating1, rating2):
"""Computes the Manhattan distance. Both rating1 and rating2 are dictionaries
of the form {'The Strokes': 3.0, 'Slightly Stoopid': 2.5}"""
distance = 0
commonRatings = False
for key in rating1:
if key in rating2:
distance += abs(rating1[key] - rating2[key])
commonRatings = True
if commonRatings:
return distance
else:
return -1 #Indicates no ratings in common
#欧几里距离
def euclidean(rating1,rating2):
"""Computes the Euclidean distance. Both rating1 and rating2 are dictionaries
of the form {'The Strokes': 3.0, 'Slightly Stoopid': 2.5}"""
distance=0
commonRatings = False
for key in rating1:
if key in rating2:
#distance += sqrt((rating1[key]-rating2[key])**2)
distance += (rating1[key] - rating2[key])**2
commonRatings=True
if commonRatings:
return distance
else:
return -1
#明氏距离
def minkowski(rating1,rating2,r):
distance=0
commonRatings=False
for key in rating1:
if key in rating2:
distance += pow(abs(rating1[key]-rating2[key]),r)
commonRatings=True
return pow(distance,1/r)
else:
return -1
def computeNearestNeighbor(username, users):
"""creates a sorted list of users based on their distance to username"""
distances = []
for user in users:
if user != username:
distance = minkowski(users[user], users[username],3)
distances.append((distance, user))
# sort based on distance -- closest first
distances.sort()
return distances
def recommend(username, users):
"""Give list of recommendations"""
# first find nearest neighbor
nearest = computeNearestNeighbor(username, users)[0][1]
print nearest
recommendations = []
# now find bands neighbor rated that user didn't
neighborRatings = users[nearest]
userRatings = users[username]
for artist in neighborRatings:
if not artist in userRatings:
recommendations.append((artist, neighborRatings[artist]))
# using the fn sorted for variety - sort is more efficient
return sorted(recommendations, key=lambda artistTuple: artistTuple[1], reverse = True)
# examples - urncomment to run
#print( recommend('Hailey', users))
def pearson(rating1,rating2):
sum_xy=0
sum_x=0
sum_y=0
sum_x2=0
sum_y2=0
n=0
for key in rating1:
if key in rating2:
n += 1
x = rating1[key]
y = rating2[key]
sum_xy += x*y
sum_x += x
sum_y += y
sum_x2 += x**2
sum_y2 += y**2
denominnator = sqrt(sum_x2-(sum_x**2)/n)*sqrt(sum_y2-(sum_y**2)/n)
if denominnator == 0:
return 0
else:
return (sum_xy-(sum_x*sum_y)/n)/denominnator
def cos_like(rating1,rating2):
innerProd=0
vector_x=0
vectoy_y=0
for key in rating1:
if key in rating2:
x=rating1[key]
y=rating2[key]
innerProd += x*y
vector_x += x**2
vectoy_y += y**2
if sqrt(vector_x)*sqrt(vectoy_y)==0:
return 0
else:
return innerProd/(sqrt(vector_x)*sqrt(vectoy_y))
print cos_like(users['Angelica'],users['Bill'])
print pearson(users['Angelica'],users['Bill'])
for list in ( recommend('Veronica', users)):
print list
from math import sqrt
users = {"Angelica": {"Blues Traveler": 3.5, "Broken Bells": 2.0, "Norah Jones": 4.5, "Phoenix": 5.0, "Slightly Stoopid": 1.5, "The Strokes": 2.5, "Vampire Weekend": 2.0},
"Bill":{"Blues Traveler": 2.0, "Broken Bells": 3.5, "Deadmau5": 4.0, "Phoenix": 2.0, "Slightly Stoopid": 3.5, "Vampire Weekend": 3.0},
"Chan": {"Blues Traveler": 5.0, "Broken Bells": 1.0, "Deadmau5": 1.0, "Norah Jones": 3.0, "Phoenix": 5, "Slightly Stoopid": 1.0},
"Dan": {"Blues Traveler": 3.0, "Broken Bells": 4.0, "Deadmau5": 4.5, "Phoenix": 3.0, "Slightly Stoopid": 4.5, "The Strokes": 4.0, "Vampire Weekend": 2.0},
"Hailey": {"Broken Bells": 4.0, "Deadmau5": 1.0, "Norah Jones": 4.0, "The Strokes": 4.0, "Vampire Weekend": 1.0},
"Jordyn": {"Broken Bells": 4.5, "Deadmau5": 4.0, "Norah Jones": 5.0, "Phoenix": 5.0, "Slightly Stoopid": 4.5, "The Strokes": 4.0, "Vampire Weekend": 4.0},
"Sam": {"Blues Traveler": 5.0, "Broken Bells": 2.0, "Norah Jones": 3.0, "Phoenix": 5.0, "Slightly Stoopid": 4.0, "The Strokes": 5.0},
"Veronica": {"Blues Traveler": 3.0, "Norah Jones": 5.0, "Phoenix": 4.0, "Slightly Stoopid": 2.5, "The Strokes": 3.0}
}
def manhattan(rating1, rating2):
"""Computes the Manhattan distance. Both rating1 and rating2 are dictionaries
of the form {'The Strokes': 3.0, 'Slightly Stoopid': 2.5}"""
distance = 0
commonRatings = False
for key in rating1:
if key in rating2:
distance += abs(rating1[key] - rating2[key])
commonRatings = True
if commonRatings:
return distance
else:
return -1 #Indicates no ratings in common
#欧几里距离
def euclidean(rating1,rating2):
"""Computes the Euclidean distance. Both rating1 and rating2 are dictionaries
of the form {'The Strokes': 3.0, 'Slightly Stoopid': 2.5}"""
distance=0
commonRatings = False
for key in rating1:
if key in rating2:
#distance += sqrt((rating1[key]-rating2[key])**2)
distance += (rating1[key] - rating2[key])**2
commonRatings=True
if commonRatings:
return distance
else:
return -1
#明氏距离
def minkowski(rating1,rating2,r):
distance=0
commonRatings=False
for key in rating1:
if key in rating2:
distance += pow(abs(rating1[key]-rating2[key]),r)
commonRatings=True
return pow(distance,1/r)
else:
return -1
def computeNearestNeighbor(username, users):
"""creates a sorted list of users based on their distance to username"""
distances = []
for user in users:
if user != username:
distance = minkowski(users[user], users[username],3)
distances.append((distance, user))
# sort based on distance -- closest first
distances.sort()
return distances
def recommend(username, users):
"""Give list of recommendations"""
# first find nearest neighbor
nearest = computeNearestNeighbor(username, users)[0][1]
print nearest
recommendations = []
# now find bands neighbor rated that user didn't
neighborRatings = users[nearest]
userRatings = users[username]
for artist in neighborRatings:
if not artist in userRatings:
recommendations.append((artist, neighborRatings[artist]))
# using the fn sorted for variety - sort is more efficient
return sorted(recommendations, key=lambda artistTuple: artistTuple[1], reverse = True)
# examples - urncomment to run
#print( recommend('Hailey', users))
def pearson(rating1,rating2):
sum_xy=0
sum_x=0
sum_y=0
sum_x2=0
sum_y2=0
n=0
for key in rating1:
if key in rating2:
n += 1
x = rating1[key]
y = rating2[key]
sum_xy += x*y
sum_x += x
sum_y += y
sum_x2 += x**2
sum_y2 += y**2
denominnator = sqrt(sum_x2-(sum_x**2)/n)*sqrt(sum_y2-(sum_y**2)/n)
if denominnator == 0:
return 0
else:
return (sum_xy-(sum_x*sum_y)/n)/denominnator
def cos_like(rating1,rating2):
innerProd=0
vector_x=0
vectoy_y=0
for key in rating1:
if key in rating2:
x=rating1[key]
y=rating2[key]
innerProd += x*y
vector_x += x**2
vectoy_y += y**2
if sqrt(vector_x)*sqrt(vectoy_y)==0:
return 0
else:
return innerProd/(sqrt(vector_x)*sqrt(vectoy_y))
print cos_like(users['Angelica'],users['Bill'])
print pearson(users['Angelica'],users['Bill'])
for list in ( recommend('Veronica', users)):
print list
0 0
- 数据挖掘之曼哈顿距离、欧几里距离、明氏距离、皮尔逊相关系数、余弦相似度Python实现代码
- Atitti knn实现的具体四个距离算法 欧氏距离、余弦距离、汉明距离、曼哈顿距离
- 曼哈顿距离,欧式距离,余弦距离
- [数据挖掘]数学基础---距离度量方式(马氏距离,欧式距离,曼哈顿距离)
- 欧氏距离和余弦相似度
- 欧氏距离和余弦相似度
- 欧氏距离和余弦相似度
- 欧氏距离与余弦相似度
- 欧氏距离和余弦相似度
- 文本相似度计算的几个距离公式(欧氏距离、余弦相似度、Jaccard距离、编辑距离)
- 【Python】欧氏距离和余弦距离
- 【Python】用zip函数求欧氏距离、余弦相似度
- 曼哈顿距离、欧氏距离
- 欧式距离与余弦相似度
- 余弦相似度 与 欧式距离 选择
- 曼哈顿距离,欧式距离,明式距离,切比雪夫距离以及马氏距离
- [推荐系统]欧氏距离和余弦相似度
- numpy欧氏距离和余弦相似度
- hdu 1829 A Bug's Life(种类并查集)
- poj 3259 最短路判负环 spfa算法和Bellman_ford算法
- mac下MySQL修改root初始密码
- 理解C语言——从小菜到大神的晋级之路(13)——进一步讨论函数与指针
- Object-C高级编程读书笔记(2)——Block的实质
- 数据挖掘之曼哈顿距离、欧几里距离、明氏距离、皮尔逊相关系数、余弦相似度Python实现代码
- 安卓学习_contentprovider()_查询手机联系人查找
- 2016年的第 10 周
- 按钮的type会对ajax的异步请求处理有影响
- 集群管理和分布式任务 Apache helix
- 完美的代价
- 求数列的和
- swfit中一个字符串显示两行不同的样式
- 四通道和三通道的处理