相似性度量方法

来源：互联网发布：sql join 多表编辑：程序博客网时间：2024/05/11 23:48

欧几里得相似度

# Returns a distance-based similarity score for person1 and person2def sim_distance(prefs,person1,person2):  # Get the list of shared_items  si={}  for item in prefs[person1]:     if item in prefs[person2]: si[item]=1  # if they have no ratings in common, return 0  if len(si)==0: return 0  # Add up the squares of all the differences  sum_of_squares=sum([pow(prefs[person1][item]-prefs[person2][item],2)                       for item in prefs[person1] if item in prefs[person2]])  return 1/(1+sum_of_squares)

皮尔逊相似度

# Returns the Pearson correlation coefficient for p1 and p2def sim_pearson(prefs,p1,p2):  # Get the list of mutually rated items  si={}  for item in prefs[p1]:     if item in prefs[p2]: si[item]=1  # if they are no ratings in common, return 0  if len(si)==0: return 0  # Sum calculations  n=len(si)  # Sums of all the preferences  sum1=sum([prefs[p1][it] for it in si])  sum2=sum([prefs[p2][it] for it in si])  # Sums of the squares  sum1Sq=sum([pow(prefs[p1][it],2) for it in si])  sum2Sq=sum([pow(prefs[p2][it],2) for it in si])     # Sum of the products  pSum=sum([prefs[p1][it]*prefs[p2][it] for it in si])  # Calculate r (Pearson score)  num=pSum-(sum1*sum2/n)  den=sqrt((sum1Sq-pow(sum1,2)/n)*(sum2Sq-pow(sum2,2)/n))  if den==0: return 0  r=num/den  return r

Tanimoto系数

#代表交集与并集的比率def tanamoto(v1,v2):  c1,c2,shr=0,0,0  for i in range(len(v1)):    if v1[i]!=0: c1+=1 # in v1    if v2[i]!=0: c2+=1 # in v2    if v1[i]!=0 and v2[i]!=0: shr+=1 # in both  return 1.0-(float(shr)/(c1+c2-shr))

0 0