通过训练集预测大学申请结果

来源：互联网发布：房地产行业各公司知乎编辑：程序博客网时间：2024/04/27 18:43
通过读取训练集建模后预测大学生入学申请的申请结果。
#include <opencv2/core/core.hpp>#include <opencv2/highgui/highgui.hpp>#include <opencv2/opencv.hpp>#include <iostream>#include <fstream>#include <math.h>#include <stdlib.h>#include <iomanip>#define alpha 0.01using namespace cv;using namespace std;// 读取训练集vector<Point3d> getTraining(string trainingPath) {vector<Point3d> result;ifstream fileIn(trainingPath, ios_base::in);if (!fileIn.is_open()) {cout << "读取文件失败" << endl;}else {for (int i = 0; i < 100; i++) {char data[100];fileIn.getline(data, 100);double num[3];num[0] = atof(strtok(data, ","));for (int i = 1; i <= 2; i++) {num[i] = atof(strtok(NULL, ","));}// 为避免超出精度范围，故对训练集的数据进行缩小Point3d p(num[0] / 10, num[1] / 10, num[2]);result.push_back(p);}}fileIn.close();return result;}// 读取测试集vector<Point3d> getTesting(string testingPath) {vector<Point3d> result;ifstream fileIn(testingPath, ios_base::in);if (!fileIn.is_open()) {cout << "读取文件失败" << endl;}else {for (int i = 0; i < 100; i++) {char data[100];fileIn.getline(data, 100);double num[2];//cout << "11" << endl;num[0] = atof(strtok(data, "\t"));num[1] = atof(strtok(NULL, "\t"));num[1] = atof(strtok(NULL, "\t"));//cout << num[0] << "\t" << num[1] << endl;Point3d p(num[0], num[1], -1);result.push_back(p);}}fileIn.close();return result;}// 获取θ转置与X的乘积double getProduct(Point3d sample, Point3d theta) {double product = theta.x + theta.y * sample.x + theta.z * sample.y;return product;}// sigmoid/logistic函数double sigmoid(double z) {double sig = exp(z) / (1 + exp(z));return sig;}// 成本函数double costFunction(Point3d sample, Point3d theta) {double h = sigmoid(getProduct(sample, theta));if (sample.z == 0)return (-1) * log10(1 - h);return (-1) * log10(h);}// 由梯度下降算法获得新的θPoint3d getNewTheta(Point3d oldTheta, Vector<Point3d> training) {Point3d newTheta;double sumX = 0, sumY = 0, sumZ = 0;for (int i = 0; i < training.size(); i++) {double h = sigmoid(getProduct(training[i], oldTheta));sumX += (h - training[i].z) * alpha * training[i].x;sumY += (h - training[i].z) * training[i].x * alpha;sumZ += (h - training[i].z) * training[i].y * alpha;}newTheta.x = oldTheta.x - sumX;newTheta.y = oldTheta.y - sumY;newTheta.z = oldTheta.z - sumZ;return newTheta;}// 对训练集的样本进行分类，检验分类模型的自测准确率void compareTraining(vector<Point3d> training, Point3d finalTheta) {int y, count = 0;for (int i = 0; i < training.size(); i++) {y = 0;if (sigmoid(getProduct(training[i], finalTheta)) >= 0.5)y = 1;if (y == training[i].z)count++;}cout << "自测准确率为 " << (double)count / training.size() << endl;}// 对测试集的样本进行分类，预测每个申请者的申请结果void compareTesting(vector<Point3d> testing, Point3d finalTheta) {Point3d temp;int y, resultTrue = 0, resultFalse = 0;cout << endl << "Num\t\tGrade1\t\tGrade2\t\tResult" << endl;for (int i = 0; i < testing.size(); i++) {y = 0;temp.x = testing[i].x / 10;temp.y = testing[i].y / 10;if (sigmoid(getProduct(temp, finalTheta)) >= 0.5) {y = 1;resultTrue++;}else {resultFalse++;}cout << "testing " << i + 1 << "\t" << testing[i].x << "\t\t" << testing[i].y << "\t\t" << y << endl;}cout << endl << resultTrue << " students can enter the university" << endl<< resultFalse << " students can't enter the university" << endl;}int main() {string trainingPath = "C:\\Users\\Administrator\\Desktop\\LR_TrainingSet.txt";string testingPath = "C:\\Users\\Administrator\\Desktop\\LR_TestingSet.txt";// 读入训练集(100个样本vector<Point3d> training = getTraining(trainingPath);vector<Point3d> testing = getTesting(testingPath);// 建立模型Point3d theta(3, 11, 9);cout << "alpha = " << alpha << endl;cout << "initial_theta\t" << theta.x << "\t" << theta.y << "\t" << theta.z << endl;Point3d oldTheta, finalTheta;double sumJ, oldJ, J = 0, minJ = 99999, num = 0;while (1) {//cout << "第" << num++ << "次循环" << endl;num++;sumJ = 0;for (int i = 0; i < training.size(); i++) {sumJ += costFunction(training[i], theta); // J(θ)之和}//system("pause");//cout << "sumJ " << sumJ << endl;oldJ = J;J = sumJ / training.size(); // J(θ)if (num == 1)minJ = J;// 不停迭代得到minJ(θ)if (J < minJ) {minJ = J;finalTheta = theta;}// 当迭代次数超过500时跳出if (num > 500)break;oldTheta = theta;// 得到新的thetatheta = getNewTheta(oldTheta, training);}// 输出最终的thetacout << "final_theta\t" << finalTheta.x << "  " << finalTheta.y << "  " << finalTheta.z << endl;// 检验自测准确率compareTraining(training, finalTheta);// 检测测试集的预测结果compareTesting(testing, finalTheta);system("pause");}
0 0