贝叶斯概率

来源:互联网 发布:模拟软件下载 编辑:程序博客网 时间:2024/06/04 23:22

1.原理

贝叶斯分类模型主要从已有数据中找到输入和输入的概率关系,并进行转换,

3.python 实现

from numpy import *def loadDataSet():    postingList=[['my', 'dog', 'has', 'flea', 'problems', 'help', 'please'],                 ['maybe', 'not', 'take', 'him', 'to', 'dog', 'park', 'stupid'],                 ['my', 'dalmation', 'is', 'so', 'cute', 'I', 'love', 'him'],                 ['stop', 'posting', 'stupid', 'worthless', 'garbage'],                 ['mr', 'licks', 'ate', 'my', 'steak', 'how', 'to', 'stop', 'him'],                 ['quit', 'buying', 'worthless', 'dog', 'food', 'stupid']]    classVec = [0,1,0,1,0,1]    #1 is abusive, 0 not    return postingList,classVecdef createVocabList(dataSet):    vocabSet = set([])    for document in dataSet:        vocabSet = vocabSet | set(document)    return list(vocabSet)def setOfWords2Vec(vocabList, inputSet):    returnVec = [0] * len(vocabList)    for word in inputSet:        if word in vocabList:            returnVec[vocabList.index(word)] = 1        else: print 'the word: %s is not in my vocabulary' %word    return returnVecdef trainNB0(trainMatrix, trainCategory):    n = len(trainMatrix)    numWords = len(trainMatrix[0])    pAbusive = sum(trainCategory)/float(n)    p0Num = ones(n)    p1Num = ones(n)    p0Denom = 2.0    p1Denom = 2.0    for i in range(n):        if trainCategory[i] == 1:            p1Num += trainMatrix[i]            p1Denom += sum(trainMatrix[i])        else:            p0Num += trainMatrix[i]            p0Denom += sum(trainMatrix[i])         p1vect = log(p1Num / p1Denom)    p0vect = log(p0Num / p0Denom)    return p0vect, p1vect,pAbusivedef classifyNB(vec2Classify, p0Vec, p1Vec, pClass1):    p1 = sum(vec2Classify * p1Vec) + log(pClass1)    p0 = sum(vec2Classify * p0Vec) + log(1.0 - pClass1)    if p1 > p0:        return 1    else:        return 0


0 0
原创粉丝点击