Mex文件编写实例: entropy used in mutual information

来源:互联网 发布:常用协议端口有多少 编辑:程序博客网 时间:2024/06/09 23:22

Jianxin Wu在他的文章《Compact Representation for Image Classification: To Choose or to Compress?》中提出了一种用于特征筛选的方法,即互信息(Mutual Information)法。


其Publication主页  http://cs.nju.edu.cn/wujx/publication.htm 上给出了具体的Matlab实现代码和论文PDF。


博主由于最近需要利用MI方法来筛选特征,而Matlab版本的代码运行速度太慢,故特意编写了C++的mex代码,并给出了Matlab中对应位置的注释。经验证无错误。


Jianxin Wu编写的Matlab代码如下:

(本博客系原创,转载请注明出处:http://blog.csdn.net/xuexiyanjiusheng/article/details/46882447)


function [entX,entXY] = entropy(dinsts, labels, bins)% Written by Jianxin Wu (Jianxin Wu, wujx2001@gmail.com) % % Given the data in 'dinsts', where each row is one example % and the 'labels' (integers 1, 2, ..., C, where C is the number of categories)% One parameter 'bins' is needed: % case '-2': quantize dinst into 2-bits, then compute MI % case '-1': quantize dinst into 1-bit, then compute MI % else: be a positive integer, uniform quantization % Output: % entX: the entropy of each feature dimension % entXY: the mutual information between each dimension and the labels % % You may want to change the thresholds for bins = -1 (1-bit) or -2 (2-bits) % e.g., if your data is non-negative, current threshold values in X will not work % they are designed for FV / VLAD features (which are [-1 +1]) % % Even if your data are [-1 +1], you may want to change the threshold +/- 0.0125 to % a value that fits your dataset, e.g., 0.1- and 0.9-quantile分位数  [~, dim] = size(dinsts);%number of columns nr_class = max(labels); % we assume labels are 1, 2, 3, ... if bins == -2 % this is the case for quantized 2-bit version     X = [-1 -0.0125 0 0.0125 1]; elseif bins == -1 % 1-bit version     X = [-1 0 1]; else    vmin = min(min(dinsts));    vmax = max(max(dinsts));     X = vmin:((vmax-vmin)/bins):vmax+1e-6; endbins = length(X)-1; probX = zeros(bins,dim); probXY = zeros(bins*nr_class,dim); for i = 1:dim     if mod(i,1000)==0         fprintf('.');     end    if mod(i,100000)==0         fprintf('\n');     end        temp = dinsts(:,i);%i-th column    frequency = histc(temp, X);     frequency = frequency / sum(frequency);     probX(:,i) = frequency(1:end-1);     this = zeros(1,nr_class*bins);         for j = 1:nr_class         frequency = histc(temp(labels==j), X);         this((j-1)*bins+1:j*bins) = frequency(1:end-1);     end    this = this / sum(this);     probXY(:,i) = this; end%fprintf('\n');entX = -sum(probX.*log2(probX+1.0e-6)); entXY = -sum(probXY.*log2(probXY+1.0e-6));


博主编写的对应C++代码如下:


//written by Panfeng Li(Panfeng Li,panfengli@hust.edu.en)//This code is just used for study#include <math.h>#include <string.h>#include "mex.h"void histc_c(double *input, double *edges, int len_input, int bins, double *outfreq, int *count)//just deal with one dimension{int i, j;for (i = 0; i<len_input; i++)//i-th input{for (j = 0; j< bins; j++){if (input[i] >= edges[j] && input[i]<edges[j + 1]){outfreq[j]++;(*count)++;}}if (input[i] == edges[bins]){outfreq[bins]++;(*count)++;}}}static void ent(double *dinsts, double *labels, double *X, int M, int N, int bins, int nr_class, double *entX, double *entXY){//dinsts([M N])//probX = zeros(bins,dim); //probXY = zeros(bins*nr_class,dim);double*temp, *temp_;double *frequency;double *this_;int i, j, k;int count, count_;double tem, tem_;double *probX, *probXY;temp = new double[M];//memory one column of dinststemp_ = new double[M];frequency = new double[bins + 1];//bins = length(X)-1this_ = new double[nr_class*bins];//nr_class = max(labels)//this = zeros(1,nr_class*bins);probX = new double[bins*N];probXY = new double[bins*nr_class * N];memset(probX, 0, (bins*N)*sizeof(double));memset(probXY, 0, (bins*nr_class * N)*sizeof(double));//calculate probX,probXYfor (i = 0; i < N; i++)//i-th column of dinsts{//calculate probXmemset(frequency, 0, (bins + 1)*sizeof(double));//each column gets its frequencyfor (j = 0; j < M; j++)//j-th row of dinsts{temp[j] = dinsts[j + i * M];//temp = dinsts(:,i)}count = 0;histc_c(temp, X, M, bins, frequency, &count);//frequency = histc(temp, X); for (j = 0; j < bins + 1; j++){//frequency[j] = frequency[j] / (count + 1.0e-6);//frequency = frequency / sum(frequency); frequency[j] = frequency[j] / (count );}//probX = zeros(bins,dim); for (j = 0; j < bins; j++){probX[j + i * bins] = frequency[j];//probX(:,i) = frequency(1:end-1);}//calculate probXYmemset(this_, 0, (nr_class*bins)*sizeof(double));//this = zeros(1,nr_class*bins); tem = 0;tem_ = 0;for (j = 1; j <= nr_class; j++)//for j = 1:nr_class{count_ = 0;//for temp_memset(frequency, 0, (bins + 1)*sizeof(double));for (k = 0; k < M; k++){if (labels[k] == j)//labels = 1 or 2{temp_[count_] = temp[k];//temp(labels==j)count_++;}}count = 0;//for frequencyhistc_c(temp_, X, count_, bins, frequency, &count);//frequency = histc(temp(labels==j), X);for (k = (j - 1)*bins; k < j*bins; k++){this_[k] = frequency[k - (j - 1)*bins];//this((j-1)*bins+1:j*bins) = frequency(1:end-1); //!!!note that 2+1:4 = 3:4;tem += this_[k];tem_++;}}for (j = 0; j < tem_; j++){//this_[j] = this_[j] / (tem + 1.0e-6);//this = this / sum(this); this_[j] = this_[j] / (tem);}//probXY = zeros(bins*nr_class,dim)for (j = 0; j < bins*nr_class; j++){probXY[j + i * (bins*nr_class)] = this_[j];//probXY(:,i) = this; }}//calculate entX,entXYfor (i = 0; i < N; i++){entX[i] = - probX[i * 2] * (log(probX[i * 2] + 1.0e-6) / log(2.0)) - probX[i * 2 + 1] * (log(probX[i * 2 + 1] + 1.0e-6) / log(2.0));//entX = -sum(probX.*log2(probX + 1.0e-6))entXY[i] = - probXY[i * 4] * (log(probXY[i * 4] + 1.0e-6) / log(2.0)) - probXY[i * 4 + 1] * (log(probXY[i * 4 + 1] + 1.0e-6) / log(2.0)) - probXY[i * 4 + 2] * (log(probXY[i * 4 + 2] + 1.0e-6) / log(2.0)) - probXY[i * 4 + 3] * (log(probXY[i * 4 + 3] + 1.0e-6) / log(2.0));//entXY = -sum(probXY.*log2(probXY + 1.0e-6))}delete[] temp;delete[] temp_;delete[] frequency;delete[] this_;delete[] probX;delete[] probXY;}//dinsts, labels, X, row, dim, bins, nr_class          void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray*prhs[]){double *dinsts;double *labels;double *X;int M, N, bins, nr_class;double *entX, *entXY;dinsts = mxGetPr(prhs[0]);labels = mxGetPr(prhs[1]);X = mxGetPr(prhs[2]);M = (int)mxGetScalar(prhs[3]);N = (int)mxGetScalar(prhs[4]);bins = (int)mxGetScalar(prhs[5]);nr_class = (int)mxGetScalar(prhs[6]);//entX = [1,dim]; //entXY = [1,dim];plhs[0] = mxCreateDoubleMatrix(1,(mwSize)N, mxREAL);plhs[1] = mxCreateDoubleMatrix(1,(mwSize)N, mxREAL);entX = mxGetPr(plhs[0]);entXY = mxGetPr(plhs[1]);ent(dinsts, labels, X, M, N, bins, nr_class, entX, entXY);}

直接mex ent.cpp就可以使用了。只需要注意输进来的参数要设置好。


0 0
原创粉丝点击