Python数据挖掘入门与实践一:计算支持度和置信度

来源:互联网 发布:免费u盘数据恢复 编辑:程序博客网 时间:2024/06/05 19:25

import numpy as np
from collections import defaultdict


#First,how many rows contain our premise:that a person is buying apples
'''num_apple_purchases=0
for sample in X:
    if sample[3]==1:    #this person bought apples
        num_apple_purchases+=1
print num_apple_purchases'''




def calS(X,n_features):


    #print n_features
    #print X[:5]#every row is a purchase record,evey column is a product
    #five kinds of product
    #bread,milk,cheese,apple and banana
    valid_rules=defaultdict(int)
    invalid_rules=defaultdict(int)
    num_occurances=defaultdict(int)
    print X
    for sample in X:
        for premise in range(5):
            if sample[premise]==0:continue
            num_occurances[premise]+=1
            for conclusion in range(n_features):
                if premise==conclusion:continue
                if sample[conclusion]==1:
                    valid_rules[(premise,conclusion)] += 1
                else:
                    invalid_rules[(premise,conclusion)] += 1
    support=valid_rules
    confidence=defaultdict(float)
    for premise,conclusion in valid_rules.keys():
        rule=(premise,conclusion)
        confidence[rule]=float(valid_rules[rule])/num_occurances[premise]    #这里需要将valid_rules的规则条目数从int转成float
    return support,confidence


def print_rule(premise,conclusion,support,confidence,features):
    premise_name=features[premise]
    conclusion_name=features[conclusion]
    print("Rule:If a person buys {0} they will also buy {1}".format(premise_name,conclusion_name))
    print("-Support:{0}".format(support[(premise,conclusion)]))
    print("-Confidence:{0:.3f}".format(confidence[(premise,conclusion)]))


if __name__ == '__main__':
    X=np.loadtxt("affinity_dataset.txt")
    n_samples,n_features=X.shape
    premise=1
    conclusion=3
    support,confidence=calS(X,n_features)
    features = ["bread", "milk", "cheese", "apples", "bananas"]
    print support,confidence
    print_rule(premise,conclusion,support,confidence,features)
0 0
原创粉丝点击