车牌识别系统开发记录(三) 字符识别

Neural Networks【OpenCV Documentation】







Mat OCR::features(Mat in, int sizeData){    // 直方图特征,    Mat vhist=ProjectedHistogram(in,VERTICAL);    Mat hhist=ProjectedHistogram(in,HORIZONTAL);        // 将输入字符resize为15*15大小    Mat lowData;    resize(in, lowData, Size(sizeData, sizeData) );            // 特征向量维数    int numCols=vhist.cols+hhist.cols+lowData.cols*lowData.cols;        Mat out=Mat::zeros(1,numCols,CV_32F);        int j=0;    for(int i=0; i<vhist.cols; i++)    {        out.at<float>(j)=vhist.at<float>(i);        j++;    }    for(int i=0; i<hhist.cols; i++)    {        out.at<float>(j)=hhist.at<float>(i);        j++;    }    for(int x=0; x<lowData.cols; x++)    {        for(int y=0; y<lowData.rows; y++){            out.at<float>(j)=(float)lowData.at<unsigned char>(x,y);            j++;        }    }    if(DEBUG)        cout << out << "\n===========================================\n";    return out;}
Mat OCR::ProjectedHistogram(Mat img, int t){    int sz=(t)?img.rows:img.cols;    Mat mhist=Mat::zeros(1,sz,CV_32F);    for(int j=0; j<sz; j++){        Mat data=(t)?img.row(j):img.col(j);        mhist.at<float>(j)=countNonZero(data);    }    //Normalize histogram    double min, max;    minMaxLoc(mhist, &min, &max);        if(max>0)        mhist.convertTo(mhist,-1 , 1.0f/max, 0);    return mhist;}




vector<CharSegment> OCR::segment(Plate plate){    Mat input=plate.plateImg;    vector<CharSegment> output;    Mat img_threshold;    threshold(input, img_threshold, 60, 255, CV_THRESH_BINARY_INV);    if(DEBUG)        imshow("Threshold plate", img_threshold);    Mat img_contours;    img_threshold.copyTo(img_contours);    // 在车牌区域中寻找可能字符的的轮廓    vector< vector< Point> > contours;    findContours(img_contours,            contours,             CV_RETR_EXTERNAL,             CV_CHAIN_APPROX_NONE);             cv::Mat result;    img_threshold.copyTo(result);    cvtColor(result, result, CV_GRAY2RGB);    cv::drawContours(result,contours,            -1,             cv::Scalar(255,0,0),             1);     vector<vector<Point> >::iterator itc= contours.begin();           while (itc!=contours.end()) {                Rect mr= boundingRect(Mat(*itc));        rectangle(result, mr, Scalar(0,255,0));        Mat auxRoi(img_threshold, mr);        if(verifySizes(auxRoi)){            auxRoi=preprocessChar(auxRoi);            output.push_back(CharSegment(auxRoi, mr));            rectangle(result, mr, Scalar(0,125,255));        }        ++itc;    }    if(DEBUG)        cout << "Num chars: " << output.size() << "\n";    if(DEBUG)        imshow("SEgmented Chars", result);    return output;}




// 这个函数主要是对输入图片归一化到统一的大小20×20Mat OCR::preprocessChar(Mat in){    int h=in.rows;    int w=in.cols;    Mat transformMat=Mat::eye(2,3,CV_32F);    int m=max(w,h);    transformMat.at<float>(0,2)=m/2 - w/2;    transformMat.at<float>(1,2)=m/2 - h/2;    Mat warpImage(m,m, in.type());    warpAffine(in, warpImage, transformMat, warpImage.size(), INTER_LINEAR, BORDER_CONSTANT, Scalar(0) );    Mat out;    resize(warpImage, out, Size(charSize, charSize) );     return out;}


the whole trained network works as follows:

  1. Take the feature vector as input. The vector size is equal to the size of the input layer.
  2. Pass values as input to the first hidden layer.
  3. Compute outputs of the hidden layer using the weights and the activation functions.
  4. Pass outputs further downstream until you compute the output layer.
So, to compute the network, you need to know all the weights w^{n+1)}_{i,j} . The weights are computed by the training algorithm. The algorithm takes a training set, multiple input vectors with the corresponding output vectors, and iteratively adjusts the weights to enable the network to give the desired response to the provided input vectors.

void OCR::train(Mat TrainData, Mat classes, int nlayers){    Mat layers(1,3,CV_32SC1);    layers.at<int>(0)= TrainData.cols;    layers.at<int>(1)= nlayers;    layers.at<int>(2)= numCharacters;    ann.create(layers, CvANN_MLP::SIGMOID_SYM, 1, 1);    //Prepare trainClases    //Create a mat with n trained data by m classes    Mat trainClasses;    trainClasses.create( TrainData.rows, numCharacters, CV_32FC1 );    for( int i = 0; i <  trainClasses.rows; i++ )    {        for( int k = 0; k < trainClasses.cols; k++ )        {            //If class of data i is same than a k class            if( k == classes.at<int>(i) )                trainClasses.at<float>(i,k) = 1;            else                trainClasses.at<float>(i,k) = 0;        }    }    Mat weights( 1, TrainData.rows, CV_32FC1, Scalar::all(1) );        //Learn classifier    ann.train( TrainData, trainClasses, weights );    trained=true;}int OCR::classify(Mat f){    int result=-1;    Mat output(1, numCharacters, CV_32FC1);    ann.predict(f, output);    Point maxLoc;    double maxVal;    minMaxLoc(output, 0, &maxVal, 0, &maxLoc);    //We need know where in output is the max val, the x (cols) is the class.    return maxLoc.x;}


