coursera公开课——recommender system作业(第三周)

来源:互联网 发布:mysql 联合主键 编辑:程序博客网 时间:2024/05/03 22:53

懒虫锅~~~!!!!
原始数据:
data1
以及用户目前对doc的数据
userdata

#coding:utf-8import csvimport operatorimport mathcsvfile=file("data1.csv",'rU')reader=csv.reader(csvfile,dialect='excel')doc={}item=[]for line in reader:    if reader.line_num==1:        for i in range(len(line)):              item.append(line[i])    else:        for i in range(len(item)):            if i==0:                doc.setdefault(line[0],{})            else:                doc[line[0]].setdefault(item[i],line[i])csvfile.close()csvfile=file('userdata.csv','rU')reader=csv.reader(csvfile,dialect='excel')User_Current={}user=[]for line in reader:    if reader.line_num==1:        for i in range(len(line)):            user.append(line[i])    else:        for i in range(len(line)):            if i>0:                User_Current.setdefault(user[i],{})                if line[i]=='':                    line[i]="0"                User_Current[user[i]].setdefault(line[0],line[i])#计算每个attibute的值DF def calDF(doc):    result={}    for it in item:        if it != "":                result.setdefault(it,{})            s=0            for d in doc:                s=s+int(doc[d][it])            result[it]=s    return resultdf=calDF(doc)#计算每个doc有几个attributedef numattr(doc):    result={}    for d in doc:        s=0        for i in doc[d]:            s=s+int(doc[d][i])        result.setdefault(d,s)    return resulttotalcount=numattr(doc)#计算userprofiledef UserProfiles(User_Current,doc):    result={}    for person in User_Current:        result.setdefault(person,{})        for i in item:            if i!="":                s=0.0                for d in User_Current[person]:#这行缩进有点问题,第一个part和后两个part的userprofile计算不同                        s=s+1.0/math.sqrt(int(totalcount[d]))*float(doc[d][i])*float(User_Current[person][d])                result[person].setdefault(i,s)                          return resultuserprofile=UserProfiles(User_Current,doc)#计算dotproductdef dotProduct(person,userprofile,doc):    result={}    for d in doc:        s=0        for i in doc[d]:            s=s+int(doc[d][i])*float(userprofile[person][i])        result.setdefault(d,s*1.0/math.sqrt(int(totalcount[d])))    return resultdotproducts=dotProduct('User 1',userprofile,doc)#计算IDF下的dotproduct=profile*doc_vector*IDFdef IDF(person,userprofile,doc):    result={}    for d in doc:        s=0.0        for i in doc[d]:            s=s+int(doc[d][i])*float(userprofile[person][i])*1.0/int(df[i])        result.setdefault(d,s*1.0/math.sqrt(int(totalcount[d])))    return resultIdf=IDF('User 2',userprofile,doc)def topn_dotProduct(dotproducts,n=5):    sorted_x=sorted(dotproducts.iteritems(),key=operator.itemgetter(1),reverse=True)    for i in range(n):        print sorted_x[i]topn_dotProduct(Idf,20)

问题一:

def UserProfiles(User_Current,doc):    result={}    for person in User_Current:        result.setdefault(person,{})        for i in item:            if i!="":                s=0.0                for d in User_Current[person]:                    s=s+float(doc[d][i])*float(User_Current[person][d])                result[person].setdefault(i,s)                          return resultuserprofile=UserProfiles(User_Current,doc)def dotProduct(person,userprofile,doc):    result={}    for d in doc:        s=0        for i in doc[d]:            s=s+int(doc[d][i])*float(userprofile[person][i])        result.setdefault(d,s)    return resultdotproducts=dotProduct('User 1',userprofile,doc)def topn_dotProduct(dotproducts,n=5):    sorted_x=sorted(dotproducts.iteritems(),key=operator.itemgetter(1),reverse=True)    for i in range(n):        print sorted_x[i]topn_dotProduct(dotproducts,20)

问题二:

def UserProfiles(User_Current,doc):    result={}    for person in User_Current:        result.setdefault(person,{})        for i in item:            if i!="":                s=0.0                for d in User_Current[person]:                    s=s+1.0/math.sqrt(int(totalcount[d]))*float(doc[d][i])*float(User_Current[person][d])                result[person].setdefault(i,s)                          return resultuserprofile=UserProfiles(User_Current,doc)def dotProduct(person,userprofile,doc):    result={}    for d in doc:        s=0        for i in doc[d]:            s=s+int(doc[d][i])*float(userprofile[person][i])        result.setdefault(d,s*1.0/math.sqrt(int(totalcount[d])))    return resultdotproducts=dotProduct('User 1',userprofile,doc)def topn_dotProduct(dotproducts,n=5):    sorted_x=sorted(dotproducts.iteritems(),key=operator.itemgetter(1),reverse=True)    for i in range(n):        print sorted_x[i]topn_dotProduct(dotproducts,20)

~~~~
遇到的一些问题
直接将表格存为.csv文件,运行python时候会报错
_csv.Error: line contains NULL byte
保存形式应该是另存为.csv

python字典排序问题

#-*- encoding=utf-8 -*-  import operator  #按字典值排序(默认为升序)  x = {1:2, 3:4, 4:3, 2:1, 0:0}  sorted_x = sorted(x.iteritems(), key=operator.itemgetter(1))  print sorted_x  #[(0, 0), (2, 1), (1, 2), (4, 3), (3, 4)]  #如果要降序排序,可以指定reverse=True  sorted_x = sorted(x.iteritems(), key=operator.itemgetter(1), reverse=True)  print sorted_x  #[(3, 4), (4, 3), (1, 2), (2, 1), (0, 0)]  #或者直接使用list的reverse方法将sorted_x顺序反转  #sorted_x.reverse()  #取代方法是,用lambda表达式  sorted_x = sorted(x.iteritems(), key=lambda x : x[1])  print sorted_x  #[(0, 0), (2, 1), (1, 2), (4, 3), (3, 4)]  sorted_x = sorted(x.iteritems(), key=lambda x : x[1], reverse=True)  print sorted_x  #[(3, 4), (4, 3), (1, 2), (2, 1), (0, 0)]  #包含字典dict的列表list的排序方法与dict的排序类似,如下:  x = [{'name':'Homer', 'age':39}, {'name':'Bart', 'age':10}]  sorted_x = sorted(x, key=operator.itemgetter('name'))  print sorted_x  #[{'age': 10, 'name': 'Bart'}, {'age': 39, 'name': 'Homer'}]  sorted_x = sorted(x, key=operator.itemgetter('name'), reverse=True)  print sorted_x  #[{'age': 39, 'name': 'Homer'}, {'age': 10, 'name': 'Bart'}]  sorted_x = sorted(x, key=lambda x : x['name'])  print sorted_x  #[{'age': 10, 'name': 'Bart'}, {'age': 39, 'name': 'Homer'}]  sorted_x = sorted(x, key=lambda x : x['name'], reverse=True)  print sorted_x  #[{'age': 39, 'name': 'Homer'}, {'age': 10, 'name': 'Bart'}]  
0 0