文章标题

来源:互联网 发布:日本电影翻译软件 编辑:程序博客网 时间:2024/06/18 17:04
%%%读入CSV文件clearM=csvread('C:\Users\Administrator\Desktop\classification\pima-indians-diabetes-database\diabetes.csv',2,1);times=0;Sfold=4;%%4 Fold 交叉验证start_idx=1; %%因为matlab没有现成的函数,自己写4 Foldacc=zeros(Sfold,5);%%保存各方法准确率的矩阵while times<Sfold  %%循环4次,4 Fold% for j=2:7%   M(M(:,j)==0,:)=[];     %%%这里是直接删掉缺失值的行% end% %很重要的是其中有很多0是指代的缺省值,如何处理?% %%暂时考虑用平均值替代一下meanVal=zeros(2,7);     %%这里是用均值替代0,自己实现for i=1:size(M,1)    for j=2:7        if M(i,j)~=0            meanVal(1,j)=meanVal(1,j)+M(i,j);            meanVal(2,j)=meanVal(2,j)+1;        end    endendres=meanVal(1,2:7)./meanVal(2,2:7);for i=1:size(M,1)    for j=2:7        if M(i,j)==0            M(i,j)=res(j-1);        end    endend% %%%%第2到第6列的0都用均值替代了%%%%%%用不同的算法做一下预测%把数据集分一下end_idx=round((times+1)*size(M,1)/Sfold);  %%%4 Fold的实现,每次取1/4的数据test_data=M(start_idx:end_idx,1:7);  %%将数据划分为测试数据集和训练数据集test_label=M(start_idx:end_idx,8);  %%训练数据集占3/4,测试占1/4M(start_idx:end_idx,:)=[];train_data=M(:,1:7);train_label=M(:,8);% train_data=M(1:567,1:7);% train_label=M(1:567,8);% test_data=M(568:end,1:7);% test_label=M(568:end,8);%%          %%%%下面开始是5中方法,逻辑回归,SVM,决策树,随机森林,提升树,保存准确率到结果矩阵acc中%%%Logistic Regressionfactor=glmfit(train_data,train_label,'binomial','link','logit');logitFit=round(glmval(factor,test_data,'logit'));accuracy_lr=size(find(logitFit==test_label),1)/size(logitFit,1);acc(times+1,1)=accuracy_lr;%%%SVMfactor_svm=svmtrain(train_data,train_label);svm_label=svmclassify(factor_svm,test_data);accuracy_svm=size(find(svm_label==test_label),1)/size(test_label,1);acc(times+1,2)=accuracy_svm;%%%CARTfactor_cart=classregtree(train_data,train_label,'method','classification');pruned_tree=prune(factor_cart,'level',2);% cart_label=eval(factor_cart,test_data);cart_label=eval(pruned_tree,test_data);cart_label=cell2mat(cart_label);cart_label_result=zeros(size(cart_label,1),1);for i=1:size(cart_label,1)    cart_label_result(i)=str2double(cart_label(i));endaccuracy_cart=size(find(cart_label_result==test_label),1)/size(test_label,1);acc(times+1,3)=accuracy_cart;%%%Random Forestfactor_rf=TreeBagger(500,train_data,train_label);[rf_label,~]=predict(factor_rf,test_data);rf_label=cell2mat(rf_label);rf_label_result=zeros(size(rf_label,1),1);for i=1:size(rf_label,1)    rf_label_result(i)=str2double(rf_label(i));endaccuracy_rf=size(find(rf_label_result==test_label),1)/size(test_label,1);acc(times+1,4)=accuracy_rf;%%%FitensembleEnsemble_factor=fitensemble(train_data,train_label,'AdaBoostM1',100,'Tree');Ensemble_label=predict(Ensemble_factor,test_data);accuracy_Ensemble=size(find(Ensemble_label==test_label),1)/size(test_label,1);acc(times+1,5)=accuracy_Ensemble;%%%%start_idx=end_idx+1;times=times+1;endmean(acc)  %%显示一下均值
0 0
原创粉丝点击