python apriori
来源:互联网 发布:淘宝可以延长几天收货 编辑:程序博客网 时间:2024/06/15 11:16
小修改+注释
"""# Python 2.7# Filename: apriori.py# Author: llhthinker# Email: hangliu56[AT]gmail[DOT]com# Blog: http://www.cnblogs.com/llhthinker/p/6719779.html# Date: 2017-04-16""""""data_set = list[list[]]L = list[set(frozenset())]support_data = dic{frozenset()} = value (support count)C1 = set(frozenset())Lk = set(frozenset())item_count = dic{frozenset()}Lksub1 = set(frozenset())Ck_item = frozenset()Ck = set(frozenset())"""#return a list(list)def load_data_set(): """ Load a sample data set (From Data Mining: Concepts and Techniques, 3th Edition) Returns: A data set: A list of transactions. Each transaction contains several items. """ data_set = [['s1', 's2', 's5'], ['s2', 's4'], ['s2', 's3'], ['s1', 's2', 's4'], ['s1', 's3'], ['s2', 's3'], ['s1', 's3'], ['s1', 's2', 's3', 's5'], ['s1', 's2', 's3']] """ the type of the data_set is list of list----------------------------------------------- """ return data_set#return a set(frozenset)def create_C1(data_set): """ Create frequent candidate 1-itemset C1 by scaning data_set. Args: data_set: A list of transactions. Each transaction contains several items. Returns: C1: A set which contains all frequent candidate 1-itemsets """ """ The explain of frozenset :http://www.cnblogs.com/panwenbin-logs/p/5519617.html """ C1 = set() for t in data_set: for item in t: item_set = frozenset([item]) #print(type(item_set),item_set) C1.add(item_set) #print(C1) return C1#return a bool -> just judge **step of pruning**def is_apriori(Ck_item, Lksub1): """ Judge whether a frequent candidate k-itemset satisfy Apriori property. Args: Ck_item: a frequent candidate k-itemset in Ck which contains all frequent candidate k-itemsets. Lksub1: Lk-1, a set which contains all frequent candidate (k-1)-itemsets. Returns: True: satisfying Apriori property. False: Not satisfying Apriori property. """ for item in Ck_item: #Ck_item is only frozenset which contains only one element(set). #print("aaa") #print(item) #str #print('bbb') #print(Ck_item) #<class 'frozenset'> #print(type(Ck_item)) #print("origin") #print(Ck_item) sub_Ck = Ck_item - frozenset([item]) #sub_Ck is (k-1)-itemsets #print("after pruning") #print(sub_Ck) if sub_Ck not in Lksub1: #print("xxx") #print(sub_Ck) return False return True#return a set(frozenset()) **step of connection**def create_Ck(Lksub1, k): """ Create Ck, a set which contains all all frequent candidate k-itemsets by Lk-1's own connection operation. Args: Lksub1: Lk-1, a set which contains all frequent candidate (k-1)-itemsets. k: the item number of a frequent itemset. Return: Ck: a set which contains all all frequent candidate k-itemsets. """ Ck = set() len_Lksub1 = len(Lksub1) #the numbers of the (k-1)-itemsets #print(len_Lksub1) list_Lksub1 = list(Lksub1) #transform (k-1)-itemsets of the set into list #print(list_Lksub1) for i in range(len_Lksub1): for j in range(i+1, len_Lksub1): l1 = list(list_Lksub1[i]) #list of the list l2 = list(list_Lksub1[j]) l1.sort() l2.sort() #print(l1) #print(l2) if l1[0:k-2] == l2[0:k-2]: Ck_item = list_Lksub1[i] | list_Lksub1[j] #connecting list( two (k-1)-itemsets ) #print("xxx") #print(Ck_item) #print(list_Lksub1) -------------- #print(type(Ck_item)) #print(type(list_Lksub1)) #process -> list_Lk = list_1 | list_2 -> tranform list_LK into Ck_item #else pruning if is_apriori(Ck_item, Lksub1): Ck.add(Ck_item) #print(Ck) #print(type(Ck)) return Ck#return a set(frozenset) **scaning the data set**def generate_Lk_by_Ck(data_set, Ck, min_support, support_data): """ Generate Lk by executing a delete policy from Ck. Args: data_set: A list of transactions. Each transaction contains several items. Ck: A set which contains all all frequent candidate k-itemsets. min_support: The minimum support. support_data: A dictionary. The key is frequent itemset and the value is support. Returns: Lk: A set which contains all all frequent k-itemsets. """ Lk = set() item_count = {} for t in data_set: # t represent a transation for item in Ck: #item represent a candidate k-itemsets """ print(type(item)) class->frozenset print(item) ->frozenset({'l2'}),which can be the key of the dictionary print(type(t)) class->list print(t) [lx,lx,...lx] """ if item.issubset(t): # the set of item is the subset of the list of t #print("Yes") if item not in item_count: item_count[item] = 1 else: item_count[item] += 1 # else: # print("No") t_num = float(len(data_set)) # total numbers of transations for item in item_count: if (item_count[item] / t_num) >= min_support: Lk.add(item) #print(Lk) support_data[item] = item_count[item] #/ t_num return Lk#return L = list(set(frozenset)) , support_data = dic()def generate_L(data_set, k, min_support): """ Generate all frequent itemsets. Args: data_set: A list of transactions. Each transaction contains several items. k: Maximum number of items for all frequent itemsets. min_support: The minimum support. Returns: L: The list of Lk. support_data: A dictionary. The key is frequent itemset and the value is support. """ support_data = {} C1 = create_C1(data_set) L1 = generate_Lk_by_Ck(data_set, C1, min_support, support_data) Lksub1 = L1.copy() #print(Lksub1) L = [] L.append(Lksub1) #print(L) for i in range(2, k+1): Ci = create_Ck(Lksub1, i) Li = generate_Lk_by_Ck(data_set, Ci, min_support, support_data) Lksub1 = Li.copy() L.append(Lksub1) #every time append a set(frozenset) where contain k-itemsets return L, support_datadef generate_big_rules(L, support_data, min_conf): """ Generate big rules from frequent itemsets. Args: L: The list of Lk. support_data: A dictionary. The key is frequent itemset and the value is support. min_conf: Minimal confidence. Returns: big_rule_list: A list which contains all big rules. Each big rule is represented as a 3-tuple. """ big_rule_list = [] sub_set_list = [] for i in range(0, len(L)): for freq_set in L[i]: for sub_set in sub_set_list: if sub_set.issubset(freq_set): conf = support_data[freq_set] / support_data[freq_set - sub_set] big_rule = (freq_set - sub_set, sub_set, conf) if conf >= min_conf and big_rule not in big_rule_list: # print freq_set-sub_set, " => ", sub_set, "conf: ", conf big_rule_list.append(big_rule) sub_set_list.append(freq_set) return big_rule_listif __name__ == "__main__": """ Test """ data_set = load_data_set() #load data L, support_data = generate_L(data_set, k=3, min_support=0.2) for Lk in L: print ("="*50) print ("frequent " + str(len(list(Lk)[0])) + "-itemsets\t\tsupport") print ("="*50) for freq_set in Lk: print (freq_set, support_data[freq_set]) print () """ big_rules_list = generate_big_rules(L, support_data, min_conf=0.7) print ("Big Rules") for item in big_rules_list: print (item[0], "=>", item[1], "conf: ", item[2]) """
阅读全文
0 0
- python apriori
- Python 实现Apriori算法
- Apriori的Python实现
- apriori算法 python实现
- python orange Apriori
- Apriori算法的Python实现!
- Apriori算法的Python实现
- Apriori算法的python实现
- 基于python 的Apriori算法
- Learning Apriori Algorithm - in Python
- Apriori算法的python实现
- Apriori
- Apriori
- Apriori
- Apriori
- Apriori
- apriori
- Apriori
- webpack入门
- linux node环境配置
- 用jquery简单实现弹幕效果
- 【GDOI2018模拟9.23】动态图
- 点亮LED灯
- python apriori
- 随(rand) (概率dp)
- request参数绑定
- [深度学习] (2):实现简单的线性回归(使用TensorFlow优化)
- ubuntu16.04 64位安装tensorflow+cuda8.0+cudnn7.0
- libuv的浅薄理解
- centos6 nat路由转发
- ubuntu中使用apt-get安装zbar
- 原型模式