推荐算法(userBased, itemBased)

来源:互联网 发布:点网络连接没反应 编辑:程序博客网 时间:2024/06/15 23:04

#!/usr/bin/pythonfrom math import sqrtdef genUserBasedMap(file = 'u.data'):    map = {}    f = open(file)    for line in f:        (user, item, rate) = line.split('\t')[0:3]        map.setdefault(int(user), {})        map[int(user)][int(item)] = int(rate)    f.close()    return mapdef genItemBasedMap(file = 'u.data'):    map = {}    f = open(file)    for line in f:        (user, item, rate) = line.split('\t')[0:3]        map.setdefault(int(item), {})        map[int(item)][int(user)] = int(rate)    f.close()    return mapdef userBased(map, person, n=5, similarity=pearson):    items = {}    itemsSim = {}    for p in map:        if(p == person): continue        score = distance(map, p, person, similarity)        if(score <= 0): continue        for i in map[p]:            if(i != 0 and i != None and i not in map[person]):                items.setdefault(i, 0)                itemsSim.setdefault(i, 0)                items[i] += score * map[p][i]                itemsSim[i] += score        #normalize the items     rankings = [(total/itemsSim[item], item) for item, total in items.items()]    rankings.sort()    rankings.reverse()    return rankings[0:n]def itemBased(map, item, n = 5, similarity=pearson):    score = []    for i in map:        if i == item: continue        score.append((distance(map, item, i, similarity), i))            score.sort()    score.reverse()    return score[0:n]    def distance(map, p1, p2, similarity=cosine):    si = {}    for item in map[p1]:        if item in map[p2]:            si[item] = 1    if len(si) == 0: return 0        # calc the distance    v1 = [map[p1][i] for i in si]    v2 = [map[p2][i] for i in si]    distance = similarity(v1, v2)    return distance#different similarity functionsdef euclidean(v1, v2):    length = min(len(v1), len(v2))    if length == 0: return 0        d = 0    for i in range(length):        d += pow((v1[i] - v2[i]), 2)    #return sqrt(d)    return 1 / float(1+d)def cosine(v1, v2):    length = min(len(v1), len(v2))    if length == 0: return 0        dp = 0 #dot product    m1 = 0 #modulus of v1    m2 = 0 #modulus of v2    for i in range(length):        dp += v1[i] * v2[i]        m1 += v1[i] * v1[i]        m2 += v2[i] * v2[i]        if m1 == 0 or m2 == 0: return 0    distance = dp / (sqrt(m1) * sqrt(m2))    return distancedef pearson(v1, v2):    length = min(len(v1), len(v2))    if length == 0: return 0        #e of v1 v2    e1 = 0    e2 = 0    for i in range(length):        e1 += v1[i]        e2 += v2[i]    e1 /= float(length)    e2 /= float(length)        cov = 0 #cov of v1 v2    d1 = 0 #variance of v2    d2 = 0 #variance of v2    for i in range(length):        diff1 = v1[i] - e1        diff2 = v2[i] - e2        cov += diff1 * diff2        d1 += diff1 * diff1        d2 += diff2 * diff2    cov /= float(length)    d1 /= float(length)    d2 /= float(length)        if d1 == 0 or d2 == 0: return 0    return cov / sqrt(d1 * d2)


原创粉丝点击