hmm 算法(1)
来源:互联网 发布:java获取天气预报接口 编辑:程序博客网 时间:2024/05/22 06:48
最近研究hmm, 然后想说看看网上写好的代码,结果还有bug~~~我也是醉醉低~~~~
自己稍微改了改,然后把代码贴到这里好了, 感觉囧囧的~~~
之后还会写写自己对这个算法的体验,感觉这个算法确实很有用~~怪说不得翻几本书都能看到它的存在~~~
HMM 代码
# -*- coding: utf-8 -*-# Copyright (c) 2012, Chi-En Wufrom itertools import izipfrom math import logdef _normalize_prob(prob, item_set): result = {} if prob is None: number = len(item_set) for item in item_set: result[item] = 1.0 / number else: prob_sum = 0.0 for item in item_set: prob_sum += prob.get(item, 0) if prob_sum > 0: for item in item_set: result[item] = prob.get(item, 0) / prob_sum else: for item in item_set: result[item] = 0 return resultdef _normalize_prob_two_dim(prob, item_set1, item_set2): result = {} if prob is None: for item in item_set1: result[item] = _normalize_prob(None, item_set2) else: for item in item_set1: result[item] = _normalize_prob(prob.get(item), item_set2) return resultdef _count(item, count): if item not in count: count[item] = 0 count[item] += 1def _count_two_dim(item1, item2, count): if item1 not in count: count[item1] = {} _count(item2, count[item1])def _get_init_model(sequences): symbol_count = {} state_count = {} state_symbol_count = {} state_start_count = {} state_trans_count = {} for state_list, symbol_list in sequences: pre_state = None for state, symbol in izip(state_list, symbol_list): _count(state, state_count) _count(symbol, symbol_count) _count_two_dim(state, symbol, state_symbol_count) if pre_state is None: _count(state, state_start_count) else: _count_two_dim(pre_state, state, state_trans_count) pre_state = state return Model(state_count.keys(), symbol_count.keys(), state_start_count, state_trans_count, state_symbol_count)def train(sequences, delta=0.0001, smoothing=0): """ Use the given sequences to train a HMM model. This method is an implementation of the `EM algorithm <http://en.wikipedia.org/wiki/Expectation%E2%80%93maximization_algorithm>`_. The `delta` argument (which is defaults to 0.0001) specifies that the learning algorithm will stop when the difference of the log-likelihood between two consecutive iterations is less than delta. The `smoothing` argument is used to avoid zero probability, see :py:meth:`~hmm.Model.learn`. """ model = _get_init_model(sequences) length = len(sequences) old_likelihood = 0 for _, symbol_list in sequences: old_likelihood += log(model.evaluate(symbol_list)) old_likelihood /= length while True: new_likelihood = 0 for _, symbol_list in sequences: model.learn(symbol_list, smoothing) new_likelihood += log(model.evaluate(symbol_list)) new_likelihood /= length if abs(new_likelihood - old_likelihood) < delta: break old_likelihood = new_likelihood return modelclass Model(object): """ This class is an implementation of the Hidden Markov Model. The instance of this class can be created by passing the given states, symbols and optional probability matrices. If any of the probability matrices are not given, the missing matrics will be set to the initial uniform probability. """ def __init__(self, states, symbols, start_prob=None, trans_prob=None, emit_prob=None): self._states = set(states) self._symbols = set(symbols) self._start_prob = _normalize_prob(start_prob, self._states) self._trans_prob = _normalize_prob_two_dim(trans_prob, self._states, self._states) self._emit_prob = _normalize_prob_two_dim(emit_prob, self._states, self._symbols) def __repr__(self): return '{name}({_states}, {_symbols}, {_start_prob}, {_trans_prob}, {_emit_prob})' \ .format(name=self.__class__.__name__, **self.__dict__) def states(self): """ Return the state set of this model. """ return set(self._states) def states_number(self): """ Return the number of states. """ return len(self._states) def symbols(self): """ Return the symbol set of this model. """ return set(self._symbols) def symbols_number(self): """ Return the number of symbols. """ return len(self._symbols) def start_prob(self, state): """ Return the start probability of the given state. If `state` is not contained in the state set of this model, 0 is returned. """ if state not in self._states: return 0 return self._start_prob[state] def trans_prob(self, state_from, state_to): """ Return the probability that transition from state `state_from` to state `state_to`. If either the `state_from` or the `state_to` are not contained in the state set of this model, 0 is returned. """ if state_from not in self._states or state_to not in self._states: return 0 return self._trans_prob[state_from][state_to] def emit_prob(self, state, symbol): """ Return the emission probability for `symbol` associated with the `state`. If either the `state` or the `symbol` are not contained in this model, 0 is returned. """ if state not in self._states or symbol not in self._symbols: return 0 return self._emit_prob[state][symbol] def _forward(self, sequence): sequence_length = len(sequence) if sequence_length == 0: return [] alpha = [{}] for state in self._states: alpha[0][state] = self.start_prob(state) * self.emit_prob(state, sequence[0]) for index in xrange(1, sequence_length): alpha.append({}) for state_to in self._states: prob = 0 for state_from in self._states: prob += alpha[index - 1][state_from] * \ self.trans_prob(state_from, state_to) alpha[index][state_to] = prob * self.emit_prob(state_to, sequence[index]) return alpha def backward(self, sequence): sequence_length = len(sequence) if sequence_length == 0: return [] beta = [{}] for state in self._states: beta[0][state] = 1 for index in xrange(sequence_length - 1, 0, -1): beta.insert(0, {}) for state_from in self._states: prob = 0 for state_to in self._states: prob += beta[1][state_from] * \ self.trans_prob(state_from, state_to) * \ self.emit_prob(state_to, sequence[index]) beta[0][state_from] = prob return beta def evaluate(self, sequence): """ Use the `forward algorithm <http://en.wikipedia.org/wiki/Forward%E2%80%93backward_algorithm>`_ to evaluate the given sequence. """ length = len(sequence) if length == 0: return 0 prob = 0 alpha = self._forward(sequence) for state in alpha[length - 1]: prob += alpha[length - 1][state] return prob def decode(self, sequence): """ Decode the given sequence. This method is an implementation of the `Viterbi algorithm <http://en.wikipedia.org/wiki/Viterbi_algorithm>`_. """ sequence_length = len(sequence) if sequence_length == 0: return [] delta = {} for state in self._states: delta[state] = self.start_prob(state) * self.emit_prob(state, sequence[0]) pre = [] for index in xrange(1, sequence_length): delta_bar = {} pre_state = {} for state_to in self._states: max_prob = 0 max_state = None for state_from in self._states: prob = delta[state_from] * self.trans_prob(state_from, state_to) if prob > max_prob: max_prob = prob max_state = state_from delta_bar[state_to] = max_prob * self.emit_prob(state_to, sequence[index]) pre_state[state_to] = max_state delta = delta_bar pre.append(pre_state) max_state = None max_prob = 0 for state in self._states: if delta[state] > max_prob: max_prob = delta[state] max_state = state if max_state is None: return [] result = [max_state] for index in xrange(sequence_length - 1, 0, -1): max_state = pre[index - 1][max_state] result.insert(0, max_state) return result def learn(self, sequence, smoothing=0): """ Use the given `sequence` to find the best state transition and emission probabilities. The optional `smoothing` argument (which is defaults to 0) is the smoothing parameter of the `additive smoothing <http://en.wikipedia.org/wiki/Additive_smoothing>`_ to avoid zero probability. """ length = len(sequence) alpha = self._forward(sequence) beta = self.backward(sequence) gamma = [] for index in xrange(length): prob_sum = 0 gamma.append({}) for state in self._states: prob = alpha[index][state] * beta[index][state] gamma[index][state] = prob prob_sum += prob if prob_sum == 0: continue for state in self._states: gamma[index][state] /= prob_sum xi = [] for index in xrange(length - 1): prob_sum = 0 xi.append({}) for state_from in self._states: xi[index][state_from] = {} for state_to in self._states: prob = alpha[index][state_from] * beta[index + 1][state_to] * \ self.trans_prob(state_from, state_to) * \ self.emit_prob(state_to, sequence[index + 1]) xi[index][state_from][state_to] = prob prob_sum += prob if prob_sum == 0: continue for state_from in self._states: for state_to in self._states: xi[index][state_from][state_to] /= prob_sum states_number = len(self._states) symbols_number = len(self._symbols) for state in self._states: # update start probability self._start_prob[state] = \ (smoothing + gamma[0][state]) / (1 + states_number * smoothing) # update transition probability gamma_sum = 0 for index in xrange(length - 1): gamma_sum += gamma[index][state] if gamma_sum > 0: denominator = gamma_sum + states_number * smoothing for state_to in self._states: xi_sum = 0 for index in xrange(length - 1): xi_sum += xi[index][state][state_to] self._trans_prob[state][state_to] = (smoothing + xi_sum) / denominator else: for state_to in self._states: self._trans_prob[state][state_to] = 0 # update emission probability gamma_sum += gamma[length - 1][state] emit_gamma_sum = {} for symbol in self._symbols: emit_gamma_sum[symbol] = 0 for index in xrange(length): emit_gamma_sum[sequence[index]] += gamma[index][state] if gamma_sum > 0: denominator = gamma_sum + symbols_number * smoothing for symbol in self._symbols: self._emit_prob[state][symbol] = \ (smoothing + emit_gamma_sum[symbol]) / denominator else: for symbol in self._symbols: self._emit_prob[state][symbol] = 0
hmm 建立模型以及相关的转移矩阵,矩阵状态,混淆矩阵,初始矩阵的初始化
states = ('box1', 'box2', 'box3');symbols = ('red', 'white');start_prob = { 'box1' : 0.2, 'box2' : 0.4, 'box3': 0.4}trans_prob = { 'box1': { 'box1' : 0.5, 'box2' : 0.2, 'box3' : 0.3}, 'box2': { 'box1' : 0.3, 'box2' : 0.5, 'box3' : 0.2}, 'box3': { 'box1' : 0.2, 'box2' : 0.3, 'box3' : 0.5}}emit_prob = { 'box1': { 'red' : 0.5, 'white': 0.5}, 'box2': { 'red' : 0.4, 'white': 0.6}, 'box3': { 'red' : 0.7, 'white':0.3}}sequence = ['red', 'white', 'red', 'white']model = hmm.Model(states, symbols, start_prob, trans_prob, emit_prob);
前向矩阵
print model.evaluate(sequence)
后向矩阵
beta = model.backward(sequence);prob = 0;length = len(sequence);for state in beta[0]: prob += beta[0][state] * model.emit_prob(state, sequence[0]) * start_prob[state];print prob;
韦比特算法
print model.decode(sequence)
阅读全文
0 0
- hmm 算法(1)
- HMM 算法随记(1)
- HMM算法
- HMM之前向算法(forward algorithms)
- HMM最大匹配分词算法(Python)
- HMM--隐马尔科夫模型 (1)
- HMM学习笔记(1)
- 隐马尔科夫模型HMM(1)
- HMM之前向算法
- HMM算法学习
- HMM之前向算法
- Baumwelch 算法训练HMM
- 【机器学习】HMM算法
- 机器学习算法-HMM
- HMM的viterbi算法
- HMM-鲍姆-韦尔奇算法
- hmm-前向算法
- hmm-DTW算法
- 复选框的全选和反选
- Java之final修饰符
- 深入探索Java 8 Lambda表达式
- fragstats v3.3绿色版
- 南邮计算机科学与技术专业本科2014级课程设计II题目
- hmm 算法(1)
- 用MR做相似度分析
- 进程监控supervisor
- TCP\IP 数据流与数据包
- 原码,反码,补码
- java script特效
- IntelliJ IDEA下的使用git
- Bloom Filter算法及应用
- 英语口语必备900句