logistics图像分类器
来源:互联网 发布:键盘侠网络暴力的危害 编辑:程序博客网 时间:2024/05/20 05:47
参考文献:http://www.deeplearning.net/tutorial/logreg.html
#coding=utf-8
from __future__ import print_function
__docformat__ = 'restructedtext en'
import six.moves.cPickle as pickle
import gzip
import os
import sys
import timeit
import numpy
import theano
import theano.tensor as T
class LogisticRegression(object):
#初始化参数
def__init__(self, input, n_in, n_out):
self.W = theano.shared(
value=numpy.zeros(
(n_in, n_out),
dtype=theano.config.floatX
),
name='W',
borrow=True
)
self.b = theano.shared(
value=numpy.zeros(
(n_out,),
dtype=theano.config.floatX
),
name='b',
borrow=True
)
#给定一幅图片x,计算x=0,x=2,…的概率
self.p_y_given_x = T.nnet.softmax(T.dot(input,self.W) + self.b)
#得到概率最大值的index,即给定一幅图片,该图片所代表的数字
self.y_pred = T.argmax(self.p_y_given_x, axis=1)
self.params = [self.W, self.b]
self.input = input
defnegative_log_likelihood(self, y):
#给定一幅图片x,最大化预测概率P(x=lable),y是正确的label,其中[T.arange(y.shape[0]), y]表示[(0,y[0]),(1,y[1]),…]
return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]),y])
deferrors(self, y):
#判断y和y_pred维度是否相同
if y.ndim != self.y_pred.ndim:
raise TypeError(
'y should have the same shapeas self.y_pred',
('y', y.type, 'y_pred',self.y_pred.type)
)
# 判断y的数据类型是否正确。T.neq返回一个由0和1构成的向量,1表示预测错误
if y.dtype.startswith('int'):
# the T.neq operator returns a vector of 0s and 1s, where 1
# represents a mistake in prediction
return T.mean(T.neq(self.y_pred, y))
else:
raise NotImplementedError()
#载入data数据
defload_data(dataset):
data_dir, data_file =os.path.split(dataset)
if data_dir == "" and notos.path.isfile(dataset):
new_path = os.path.join(
os.path.split(__file__)[0],
"..",
"data",
dataset
)
if os.path.isfile(new_path) ordata_file == 'mnist.pkl.gz':
dataset = new_path
if (not os.path.isfile(dataset)) anddata_file == 'mnist.pkl.gz':
from six.moves import urllib
origin = (
'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz'
)
print('Downloading data from %s' %origin)
urllib.request.urlretrieve(origin,dataset)
print('... loading data')
# Load the dataset
with gzip.open(dataset, 'rb') as f:
try:
train_set, valid_set, test_set =pickle.load(f, encoding='latin1')
except:
train_set, valid_set, test_set =pickle.load(f)
def shared_dataset(data_xy, borrow=True):
data_x, data_y = data_xy
shared_x =theano.shared(numpy.asarray(data_x,
dtype=theano.config.floatX),
borrow=borrow)
shared_y =theano.shared(numpy.asarray(data_y,
dtype=theano.config.floatX),
borrow=borrow)
return shared_x, T.cast(shared_y,'int32')
test_set_x, test_set_y =shared_dataset(test_set)
valid_set_x, valid_set_y =shared_dataset(valid_set)
train_set_x, train_set_y =shared_dataset(train_set)
rval = [(train_set_x, train_set_y),(valid_set_x, valid_set_y),
(test_set_x, test_set_y)]
return rval
defsgd_optimization_mnist(learning_rate=0.13,n_epochs=1000,
dataset='mnist.pkl.gz',
batch_size=600):
datasets = load_data(dataset)
train_set_x, train_set_y = datasets[0]
valid_set_x, valid_set_y = datasets[1]
test_set_x, test_set_y = datasets[2]
#批量计算时,需要的批次
n_train_batches =train_set_x.get_value(borrow=True).shape[0] // batch_size
n_valid_batches= valid_set_x.get_value(borrow=True).shape[0] // batch_size
n_test_batches =test_set_x.get_value(borrow=True).shape[0] // batch_size
print('... building the model')
#批量训练时的批次
index = T.lscalar() # index to a [mini]batch
# generate symbolic variables for input (xand y represent a
# minibatch)
x = T.matrix('x') # data, presented as rasterized images
y = T.ivector('y') # labels, presented as 1D vector of [int]labels
# construct the logistic regression class
# Each MNIST image has size 28*28
classifier =LogisticRegression(input=x, n_in=28 * 28, n_out=10)
#负的似然函数
cost =classifier.negative_log_likelihood(y)
#测试模型
test_model =theano.function(
inputs=[index],
outputs=classifier.errors(y),
givens={
x: test_set_x[index * batch_size:(index + 1) * batch_size],
y: test_set_y[index * batch_size:(index + 1) * batch_size]
}
)
#交叉验证模型
validate_model =theano.function(
inputs=[index],
outputs=classifier.errors(y),
givens={
x: valid_set_x[index * batch_size:(index + 1) * batch_size],
y: valid_set_y[index * batch_size:(index + 1) * batch_size]
}
)
# compute the gradient of cost with respectto theta = (W,b)
g_W =T.grad(cost=cost, wrt=classifier.W)
g_b =T.grad(cost=cost, wrt=classifier.b)
更新参数
updates =[(classifier.W, classifier.W - learning_rate * g_W),
(classifier.b, classifier.b -learning_rate * g_b)]
# compiling a Theano function `train_model`that returns the cost, but in
# the same time updates the parameter ofthe model based on the rules
# defined in `updates`
train_model= theano.function(
inputs=[index],
outputs=cost,
updates=updates,
givens={
x: train_set_x[index * batch_size:(index + 1) * batch_size],
y: train_set_y[index * batch_size:(index + 1) * batch_size]
}
)
#训练模型
print('... training the model')
# early-stopping parameters
patience = 5000 # look as this many examples regardless
patience_increase = 2 # wait this much longer when a new best is
# found
improvement_threshold = 0.995 # a relative improvement of this much is
# consideredsignificant
validation_frequency= min(n_train_batches, patience // 2)
# go throughthis many
# minibatchebefore checking the network
# on thevalidation set; in this case we
# check everyepoch
best_validation_loss = numpy.inf
test_score = 0.
start_time = timeit.default_timer()
done_looping = False
epoch = 0
while (epoch < n_epochs) and (notdone_looping):
epoch = epoch + 1
for minibatch_indexin range(n_train_batches):
#不断训练ing
minibatch_avg_cost= train_model(minibatch_index)
# iteration number
iter = (epoch - 1) *n_train_batches + minibatch_index
#最后一次迭代,算下错误率
if (iter + 1) %validation_frequency == 0:
# compute zero-one loss onvalidation set
validation_losses =[validate_model(i)
for i inrange(n_valid_batches)]
this_validation_loss =numpy.mean(validation_losses)
print(
'epoch %i, minibatch %i/%i,validation error %f %%' %
(
epoch,
minibatch_index + 1,
n_train_batches,
this_validation_loss *100.
)
)
# if we got the best validationscore until now
if this_validation_loss <best_validation_loss:
#improve patience if lossimprovement is good enough
if this_validation_loss< best_validation_loss * \
improvement_threshold:
patience =max(patience, iter * patience_increase)
#依次缩小best_validation_loss的值
best_validation_loss =this_validation_loss
# test it on the test set
test_losses =[test_model(i)
for i inrange(n_test_batches)]
test_score =numpy.mean(test_losses)
print(
(
' epoch %i, minibatch %i/%i, test error of'
' best model %f %%'
) %
(
epoch,
minibatch_index +1,
n_train_batches,
test_score * 100.
)
)
# save the best model
with open('best_model.pkl','wb') as f:
pickle.dump(classifier,f)
if patience <= iter:
done_looping = True
break
end_time = timeit.default_timer()
print(
(
'Optimization complete with bestvalidation score of %f %%,'
'with test performance %f %%'
)
% (best_validation_loss * 100.,test_score * 100.)
)
print('The code run for %d epochs, with %fepochs/sec' % (
epoch, 1. * epoch / (end_time -start_time)))
print(('The code for file ' +
os.path.split(__file__)[1] +
' ran for %.1fs' % ((end_time -start_time))), file=sys.stderr)
defpredict():
"""
An example of how to load a trained modeland use it
to predict labels.
"""
# load the saved model
classifier = pickle.load(open('best_model.pkl'))
# compile a predictor function
predict_model = theano.function(
inputs=[classifier.input],
outputs=classifier.y_pred)
# We can test it on some examples from testtest
dataset='mnist.pkl.gz'
datasets = load_data(dataset)
test_set_x, test_set_y = datasets[2]
test_set_x = test_set_x.get_value()
predicted_values =predict_model(test_set_x[:10])
print("Predicted values for the first10 examples in test set:")
print(predicted_values)
def test1():
import matplotlib.pyplot as plt
classifier = pickle.load(open('best_model.pkl'))
imageTest = plt.imread('x0.png') #自己拍的照
predict_model = theano.function(inputs=[classifier.input], outputs = classifier.y_pred)
imageTest = imageTest.reshape(1,28*28)
imageTest = 1.-imageTest #因为自己写的为白底黑字,所以变成黑底白字
predicted_values = predict_model(imageTest)
print (predicted_values)
if __name__== '__main__':
sgd_optimization_mnist()
test1()
- logistics图像分类器
- logistics回归分类图片
- 零售户logistics回归分类
- 零售户logistics回归分类
- Python Multinomial Logistics 实现MNIST分类
- 关于Logistics回归用于分类的解释
- SparkML之分类(二)logistics回归
- 使用sklearn 实现 Logistics Regression 分类
- 图像分类器(KNN)
- logistics回归--梯度上升算法以及改进--用于二分类
- 分类问题:logistics Regression的方法及步骤
- 图像分类
- 图像分类基本流程及 KNN 分类器
- 白话——胡说图像分类器
- haarcascade 分类器(视频图像皆可)
- tensorflow练习9:图像分类器
- Logistics Rrgression(Logistics回归)
- 关于少量数据的svm、svr和logistics的分类比较
- struts2中的action中实现了validate方法,struts.xml中的action配置了通配符,不能自动跳转到input
- com.alibaba.dubbo.rpc.RpcException: Failed to invoke the method 错误处理
- 设计模式学习
- 计蒜客 简单图论(并查集 + 排序 + 枚举)
- sys/class文件节点的制作
- logistics图像分类器
- 前端编写第一个后台接口
- Alsa音频编程
- 百度云获取下载链接代码。
- error C2065: “servAddr”: 未声明的标识符__错误记录
- The Eclipse executable launcher was unable to locate its companion shared library
- EditText+listview+数据库操作实现搜索功能
- 有哪些比较好看的纪录片?
- mybatis 保存后返回保存成功后的id