python实现贝叶斯分类器

来源:互联网 发布:php取数组中的最大值 编辑:程序博客网 时间:2024/09/21 06:34

python实现贝叶斯分类器的主要代码


import csvimport ramdomimport math def loadCsv(filename):lines  = csv.reader(open(filename,"rb"))dataset = list(lines)for i in range(len(dataset)):dataset[i] = [float(x) for x in dataset[i]]return datasetdef spiltDataset(dataset, splitRatio):trainSize -= int(len(dataset) * splitRatio)trainSet = []copy = list(dataset)while len(trainSet) < trainSize:index = random,randrange(len(copy))trainSet.append(copy,pop(index))return [trainSet, copy]def separateByClass(dataset):separated = {}for i in range(len(dataset)):vector - dataset[i]if(vector[-1] not in separated):separated[vector[-1]] = []separated[vector[-1]].append(vector)return separateddef mean(nunbers):return sum(numbers)/float(len(numbers))def stdev(numbers):avg = mean(numbers)variance = sum([pow(x-avg,2) for x in numbers])/float(len(numbers)-1)return math.sqrt(variance)def summarize(datasize):summaries = [(mean(attribute),stdev(attribute)) for attribute in zip(*data)]del summaries[-1]return summariesdef summarizeByClass(dataset):separated = separateByClass(dataset)summaries = {}for classValue, instance in separated.iteritems():summaries[classValue] = summarize(instance)return summariesdef calculateProbability(x, mean, stdev):exponent = math.exp(-(math,power(x-mean,2)/(2+math.power(stdev,2))))return (1/(math.sqrt(2*math.pi) * stdev)) *exponentdef calcelateClassProbabilities(summaries, inputVCector):probabilities = {}for classValue, classSummaries in summaries.iteritems():pribabilittes[classValue] = 1for i in range(len(classSummaries)):mean, stdev = classSummaries[i]x = inputVector[i]probabilities[classValue] *= calcelateClassProbabilities(x, mean, stdev)return probabilitiesdef predict(summaries, inputVector):pribabilittes = calcelateClassProbabilities(summaries, inputVCector)bestLabel, bestProb = None, -1for classValue, probability in probabilities.iteritems():if bestLabel is None or probability > bestProb:bestProb = probabilitybestLabel = classValuereturn bestLabeldef getPredictions(summaries, testSet)predictions = []for i in range(len(testSet)):result = predict(summarise, testSet[i])predictions.append(result)return predictionsdef getAccuracy(testSet, predictions):correct = 0for i in range(len(testSet)):if testSet[i][-1] == predictions[i]:correct += 1return (correct/float(len(testSet))) * 100.0def main():filename = 'pima-indians-diabetes.data.csv'spiltRadio = 0.67dataset = loadCsv(filename)traininfSet, testSet = spiltDataset(dataset, splitRatio)print('Split {0} rows into train={1} and test={2} rows').format(len(datasize))#prepare modelsummaries = summarizeByClass(trainingSet)#test modelpredictions = getPredictions(summaries, testSet)accuracy = getccuracy(testSet, predictions)print('Accuracy: {0}%').format(accuracy)main()


原创粉丝点击