西瓜书《机器学习》课后答案——chapter7_7.3AODE

来源:互联网 发布:阿里云国外节点 编辑:程序博客网 时间:2024/06/05 06:42

AODE算法的难点在于存构建存储计数的数据结构,这里采用三层字典表示P(c,xi),五层字典表示P(xj|c,xi)。由于数据集比较小,我们取m=0。另外,对于连续属性,不知道怎么处理,所以这里只考虑离散属性。

# -*-coding:gbk -*-"""@Author: Victoria@Date: 2017.10.19 21:30"""import xlrdclass AODE():    def __init__(self, d, class_num = 2):        #discrete features number        self.d = d        self.class_num = class_num    def train(self, X, y):        """        The training process of AODE is to save estimated joint probability.        """        count_xj_c_xi = {}        count_c_xi = {}        prob_xj_c_xi = {}        prob_c_xi = {}        N = len(X)        attrs = []        for i in range(self.d):            attr = []            for n in range(N):                if X[n][i] not in attr:                    attr.append(X[n][i])            attrs.append(attr)         for c in range(self.class_num):            count_c_xi[c] = {}            prob_c_xi[c] = {}            count_xj_c_xi[c] = {}            prob_xj_c_xi[c] = {}            for i in range(self.d):                count_c_xi[c][i] = {}                prob_c_xi[c][i] = {}                count_xj_c_xi[c][i] = {}                prob_xj_c_xi[c][i] = {}                for attr_i in attrs[i]:                    count_c_xi[c][i][attr_i] = 0                    prob_c_xi[c][i][attr_i] = 0                    count_xj_c_xi[c][i][attr_i] = {}                    prob_xj_c_xi[c][i][attr_i] = {}                    for j in range(self.d):                        count_xj_c_xi[c][i][attr_i][j] = {}                        prob_xj_c_xi[c][i][attr_i][j] = {}                        for attr_j in attrs[j]:                            count_xj_c_xi[c][i][attr_i][j][attr_j] = 0                            prob_xj_c_xi[c][i][attr_i][j][attr_j] = 0        for n in range(N):            for i in range(self.d):                    count_c_xi[y[n]][i][X[n][i]] += 1                    for j in range(self.d):                        count_xj_c_xi[y[n]][i][X[n][i]][j][X[n][j]] += 1        for c in range(self.class_num):            for i in range(self.d):                #the values number of i-th attribution                v_i = len(attrs[i])                for attr_i_value, N_c_xi in count_c_xi[c][i].items():                    prob_c_xi[c][i][attr_i_value] = float(N_c_xi + 1) / (N + self.class_num *v_i)                    for j in range(self.d):                        v_j = len(attrs[j])                        for attr_j_value, N_c_xi_xj in count_xj_c_xi[c][i][attr_i_value][j].items():                            prob_xj_c_xi[c][i][attr_i_value][j][attr_j_value] = float(N_c_xi_xj + 1) / (N_c_xi + v_j)        self.count_xj_c_xi = count_xj_c_xi        self.count_c_xi = count_c_xi        self.prob_xj_c_xi = prob_xj_c_xi        self.prob_c_xi = prob_c_xi    def predict(self, x):        probs = []        for c in range(self.class_num):            prob_c = 0            for i in range(self.d):                prob_j_c_i_product = 1.0                for j in range(self.d):                    prob_j_c_i_product *= self.prob_xj_c_xi[c][i][x[i]][j][x[j]]                prob_c_i_term = self.prob_c_xi[c][i][x[i]] * prob_j_c_i_product            prob_c += prob_c_i_term            probs.append(prob_c)        label = probs.index(max(probs))        prob = max(probs)        return label, probif __name__=="__main__":    workbook = xlrd.open_workbook("../../数据/3.0.xlsx")    sheet = workbook.sheet_by_name("Sheet1")    X = []    for i in range(17):        x = sheet.col_values(i)[0:6]        for j in range(6):            x[j] = int(x[j])        print x        X.append(x)    y = sheet.row_values(8)    y = [int(i) for i in y]    aode = AODE(d=6)    aode.train(X, y)    label, prob = aode.predict([1, 1, 1, 1, 1, 1])    print "the predict label is {} with prob {}".format(label, prob)

预测结果:

the predict label is 1 with prob 0.0186709343088 #预测为正例
阅读全文
0 0