相似度(参数调节代码)

来源:互联网 发布:telnet本机ip端口不通 编辑:程序博客网 时间:2024/06/11 20:08
// test1.cpp : Defines the entry point for the console application.//#include <vector>#include <cstring>#include <fstream>#include <string>#include <iostream>#include <algorithm>#include <cstdio>#include <cstdlib>#include <map>#include <ctime>#include <iomanip>#include <cmath>#include <queue>using namespace std;struct serie{vector<double> vec;string label;double len;};double speed = 1.6;double INF = 1e10;serie series[10000];double dpA[2000][2000];double dpB[2000][2000];int num = 637;double coe=0.2;int WIN = 0;double down_ratio;map<int, string> names;vector<int> lens(38, 0);vector<int> num1(38, 0);vector<int> num2(38, 0);vector<int> classes(38,0);vector<double> dev_mean(38, 0);string to_string(int value){char help[10];sprintf(help,"%d",value);return string(help);}double DPA(int i, int j, int win){if ((i == 0 && j == win + 1) || (j == 0 && i == win + 1)){return dpA[i][j];;}if (abs(i - j) <= win)return dpA[i][j];return INF;}double DPB(int i, int j, int win)   {if ((i == 0 && j == win + 1) || (j == 0 && i == win + 1)){return dpB[i][j];;}if (abs(i - j) <= win)return dpB[i][j];return INF;}double getdis(int one, int two, int i, int j){vector<double>& vec1 = series[one].vec;vector<double>& vec2 = series[two].vec;int n = vec1.size();double ans = (vec1[i * 2] - vec2[j * 2])*(vec1[i * 2] - vec2[j * 2]) + (vec1[i * 2 + 1] - vec2[j * 2 + 1])*(vec1[i * 2 + 1] - vec2[j * 2 + 1]);return sqrt(ans);}double getdistance00(int one, int two){int n = series[two].vec.size() + 1;dpA[0][0] = 0;dpB[0][0] = 0;dpA[1][0] = 0;dpB[0][1] = 0;n /= 2;for (int i = 2; i <= min(n - 1, WIN + 1); i++){dpA[i][0] = dpA[i - 1][0] + getdis(one, one, i - 1, i - 1 - 1);// fabs(series[one].vec[i - 1] - series[one].vec[i - 1 - 1]);dpB[0][i] = dpB[0][i - 1] + getdis(two, two, i - 1, i - 1 - 1);// fabs(series[two].vec[i - 1] - series[two].vec[i - 1 - 1]);getdis(two, two, i - 1, j - 1-1)}int win = WIN;for (int i = 1; i < n; i++)for (int j = max(1, i - win); j < min(n, i + win + 1); j++){if (i != 1)dpA[i][j] = min(DPA(i - 1, j, win) + getdis(one, one, i - 1, i - 1 - 1), DPB(i - 1, j, win) + coe*getdis(one, two, i - 1, j - 1));elsedpA[i][j] = DPB(i - 1, j, win) + coe*getdis(one, two, i - 1, j - 1);if (j != 1)dpB[i][j] = min(DPA(i, j - 1, win) + coe*getdis(one, two, i - 1, j - 1), DPB(i, j - 1, win) + getdis(two, two, i - 1, j - 1 - 1));elsedpB[i][j] = DPA(i, j - 1, win) + coe*getdis(one, two, i - 1, j - 1);}return min(dpA[n - 1][n - 1], dpB[n - 1][n - 1]);}double getdistance0t(int one, int two){int n = series[two].vec.size() + 1;dpA[0][0] = 0;dpB[0][0] = 0;dpA[1][0] = 0;dpB[0][1] = 0;for (int i = 2; i <= min(n-1,WIN + 1); i++){dpA[i][0] = dpA[i - 1][0] + fabs(series[one].vec[i - 1] - series[one].vec[i - 1 - 1]);dpB[0][i] = dpB[0][i - 1] + 0*fabs(series[two].vec[i - 1] - series[two].vec[i - 1 - 1]);}int win = WIN;for (int i = 1; i < n; i++)for (int j = max(1, i - win); j < min(n, i + win + 1); j++){if (i != 1)dpA[i][j] = min(DPA(i - 1, j, win) + fabs(series[one].vec[i - 1] - series[one].vec[i - 1 - 1]), DPB(i - 1, j, win) + coe*fabs(series[one].vec[i - 1] - series[two].vec[j - 1]));elsedpA[i][j] = DPB(i - 1, j, win) + coe*fabs(series[one].vec[i - 1] - series[two].vec[j - 1]);if (j != 1)dpB[i][j] = min(DPA(i, j - 1, win) + coe*fabs(series[one].vec[i - 1] - series[two].vec[j - 1]), DPB(i, j - 1, win) + 0*fabs(series[two].vec[j - 1] - series[two].vec[j - 1 - 1]));elsedpB[i][j] = DPA(i, j - 1, win) + coe*fabs(series[one].vec[i - 1] - series[two].vec[j - 1]);}return min(dpA[n - 1][n - 1], dpB[n - 1][n - 1]) ;}double getdistance0(int one, int two){int n = series[two].vec.size() + 1;dpA[0][0] = 0;dpB[0][0] = 0;dpA[1][0] = 0;dpB[0][1] = 0;for (int i = 2; i <= min(n - 1, WIN + 1); i++){dpA[i][0] = dpA[i - 1][0] + fabs(series[one].vec[i - 1] - series[one].vec[i - 1 - 1]);dpB[0][i] = dpB[0][i - 1] + fabs(series[two].vec[i - 1] - series[two].vec[i - 1 - 1]);}int win = WIN;for (int i = 1; i < n; i++)for (int j = max(1, i - win); j < min(n, i + win + 1); j++){if (i != 1)dpA[i][j] = min(DPA(i - 1, j, win) + fabs(series[one].vec[i - 1] - series[one].vec[i - 1 - 1]), DPB(i - 1, j, win) + coe*fabs(series[one].vec[i - 1] - series[two].vec[j - 1]));elsedpA[i][j] = DPB(i - 1, j, win) + coe*fabs(series[one].vec[i - 1] - series[two].vec[j - 1]);if (j != 1)dpB[i][j] = min(DPA(i, j - 1, win) + coe*fabs(series[one].vec[i - 1] - series[two].vec[j - 1]), DPB(i, j - 1, win) + fabs(series[two].vec[j - 1] - series[two].vec[j - 1 - 1]));elsedpB[i][j] = DPA(i, j - 1, win) + coe*fabs(series[one].vec[i - 1] - series[two].vec[j - 1]);}return min(dpA[n - 1][n - 1], dpB[n - 1][n - 1]);}double dp[2000][2000];double getdistance1(int one, int two){int n = series[two].vec.size();dp[0][0] = fabs(series[one].vec[0] - series[two].vec[0]);int win = WIN;for (int i = 1; i < min(i+win,n); i++){dp[i][0] = dp[i-1][0]+fabs(series[one].vec[i]-series[two].vec[0]);dp[0][i] = dp[0][i-1]+fabs(series[one].vec[0] - series[two].vec[i]);}for (int i = 1; i < n; i++)for (int j = max(1,i-win); j < min(i+win+1,n); j++){if (j==i-win)dp[i][j] = fabs(series[one].vec[i] - series[two].vec[j]) +  min(dp[i - 1][j], dp[i - 1][j - 1]);else if (j==i+win)dp[i][j] = fabs(series[one].vec[i] - series[two].vec[j]) + min(dp[i][j - 1], dp[i - 1][j - 1]);elsedp[i][j] = fabs(series[one].vec[i] - series[two].vec[j]) +min(dp[i - 1][j], min(dp[i][j - 1], dp[i - 1][j - 1]));}return dp[n - 1][n - 1];return 0;}double getdistance2(int one, int two){int n = series[two].vec.size();double ans = 0;for (int i = 0; i < series[one].vec.size(); i++){ans += fabs(series[one].vec[i] - series[two].vec[i]);}return ans;}double getdistance3(int one, int two){int n = series[one].vec.size();double ans = 0;for (int i = 0; i < n - 1; i++){ans += abs(series[one].vec[i] - series[two].vec[i]);ans += abs(series[two].vec[i] - series[one].vec[i + 1]);ans += abs(series[one].vec[i] - series[two].vec[i + 1]);}ans += abs(series[one].vec[n - 1] - series[two].vec[n - 1]);return ans;}double getdistance4(int one, int two){int n = series[two].vec.size();double ans = 0;dp[0][0] = abs(series[one].vec[0] - series[two].vec[0]);for (int i = 1; i < n; i++){dp[i][0] = dp[i - 1][0] + abs(series[one].vec[i - 1] - series[one].vec[i]);dp[0][i] = dp[0][i - 1] + abs(series[two].vec[i - 1] - series[two].vec[i]);}int win = WIN;for (int i = 1; i < n; i++)//for (int j = 1; j < n; j++)for (int j = max(1, i - win); j < min(i + win + 1, n); j++){dp[i][j] = INF;if (j - i + 1 <= win)dp[i][j] = min(dp[i][j],dp[i - 1][j] + abs(series[one].vec[i - 1] - series[one].vec[i]));if (i - j + 1 <= win)dp[i][j] = min(dp[i][j], dp[i][j - 1] + abs(series[two].vec[j - 1] - series[two].vec[j]));if (j - i + 1 <= win)dp[i][j] = min(dp[i][j], dp[i - 1][j ] + abs(series[one].vec[i] - series[two].vec[j])*coe);if (i - j + 1 <= win)dp[i][j] = min(dp[i][j], dp[i ][j - 1] + abs(series[one].vec[i] - series[two].vec[j])*coe);}return dp[n - 1][n - 1];}double getdistance5(int one, int two){int n = series[one].vec.size();double ans = 0;ans += abs(series[one].vec[0] - series[two].vec[0]);ans += abs(series[one].vec[n - 1] - series[two].vec[n - 1]);for (int i = 1; i < n - 1; i++){ans += min(abs(series[one].vec[i] - series[two].vec[i - 1]), min(abs(series[one].vec[i] - series[two].vec[i]), abs(series[one].vec[i] - series[two].vec[i + 1])));}ans += abs(series[one].vec[n - 1] - series[two].vec[n - 1]);return ans;}void readInfo(){ifstream file("/home/xiefubao/myproject/experiment/vldb_dataset/numhelp.txt");if (!file.is_open()){cout << "num.txt not open!" << endl;exit(0);}for (int i = 0; i < 38; i++){string now;int no;file >> no;file >> now;file >> lens[i];file >> num1[i];file >> num2[i];file >> classes[i];file >> dev_mean[i];names[i] = now;}}void down_sample(int counter){int newnum = num*(1 - down_ratio);int getout = num - newnum;for (int i = 0; i < counter; i++){vector<double> down;vector<int> help(num, 0);vector<bool> rem(num, 1);for (int j = 0; j < num; j++){help[j] = j;}for (int j = 0; j < getout; j++){int position = rand() % (num - j);rem[help[position]] = 0;swap(help[position], help[num-j-1]);}vector<double> now(newnum, 0);int add = 0;for (int j = 0; j < num; j++){if (rem[j])now[add++] = series[i].vec[j];}swap(series[i].vec, now);}num = newnum;}void readFile(int filenum , bool train){num = lens[filenum - 1];string filepath = "/home/xiefubao/myproject/experiment/vldb_dataset/" + to_string(filenum) + "/" + names[filenum - 1] + (train ? "_TRAIN" : "_TEST") + ".txt";cout << filepath << endl;string now;ifstream in(filepath.c_str());if (!in.is_open()){cout << "not open" << endl;exit(1);}int number = train ? num2[filenum - 1] : num1[filenum - 1];for(int u = 0;u < number;u++){in >> now;series[u].label = now;double len = 0;series[u].vec.clear();for (int i = 0; i < num; i++){double point;in >> point;point = point;series[u].vec.push_back(point);if (i != 0)len += fabs(point - series[u].vec[i - 1]);}series[u].len = len;}cout<<"done 1"<<endl;//down_sample(number);}/*bool operator<(pair<double,string> n1,pair<double,string> n2) {        return n1.first < n2.first;}*/int getvalue(int filenum,int wlen,double c,int counter,double(*distance) (int, int)){int ans = 0;WIN = wlen;coe = c;for (int i = 0; i < counter; i++){//cout << counter << " " << ans <<endl;priority_queue<pair<double,string> > pri;int prinum = 3;for(int j = 0;j < counter; j++){if (i == j) continue;if (((i+3737)*(j+4343)) %(max(1,counter/classes[filenum]/5)) != 0) continue;double dis = distance(i,j);if(pri.size() < prinum)pri.push(make_pair(dis,series[j].label));else if(pri.top().first > dis){pri.pop();pri.push(make_pair(dis,series[j].label));}}for(int u = 1;u <= prinum && !pri.empty();u++){if(pri.top().second == series[i].label){ans += u*u;}pri.pop();}}cout << "ans " << ans <<endl;return ans;}int   find_win_size(int filenum,double c,int counter,double(*distance) (int, int)){int max_value = -1;int len = -1;for(int wlen = lens[filenum] / 2; wlen >= 2 ; wlen /= speed){cout<<wlen<<endl;int value = getvalue(filenum,wlen,c,counter,distance);if(value >= max_value){max_value = value;len = wlen;}}return len;}double find_ceo(int filenum,int counter, double(*distance) (int, int)){int max_value = -1;double ans = -1;for(double rat = 1; rat > 0.001 ; rat /= 1.8){int value = getvalue(filenum,WIN,rat,counter,distance);if(value >= max_value){max_value = value;ans = rat;}}return ans;}int main(){//cout << "xie" <<  << 123 << "123" << endl; getchar();readInfo(); ofstream result;result.open("/home/xiefubao/myproject/experiment/vldb_dataset/compare.txt",ios::app);//file location//int filenum = 31;bool train = false;for (int filenum = 38; filenum <= 38; filenum++){/*down_ratio = 0.2;readFile(filenum, false);cout << "start find winsize" << endl;WIN = find_win_size(filenum - 1,dev_mean[filenum-1]/2,num2[filenum -1],getdistance0);cout << "start find coe" << endl;coe = find_ceo(filenum - 1,num2[filenum -1],getdistance0);cout << "have done" << endl;cout << "WIN && coe : " << WIN << " " << coe <<endl;        */clock_t start, finish;start = clock();        WIN = 5;        coe = dev_mean[filenum - 1];vector<int> hitnum(5, 0);int counter = num1[filenum - 1];cout << "reading" << endl;readFile(filenum, train);cout << "have read" << endl;cout << "xiefubao " << WIN << coe <<endl;//double(*distance[5]) (int, int) = { getdistance00, getdistance0, getdistance1, getdistance2, getdistance3 };double(*distance[5]) (int, int) = { getdistance0,getdistance1, getdistance2 };vector<int> computeDis;computeDis.push_back(0);computeDis.push_back(1);computeDis.push_back(2);//computeDis.push_back(3);//computeDis.push_back(4);for (int i = 0; i < counter; i++){vector<double> mist(5, INF);vector<int> bestnum(5, -1);//cout << i << endl;  for (int j = 0; j < counter; j++){if (i == j) continue;if (rand() %(max(1,counter/classes[filenum-1]/5)) != 0) continue;vector<double> dist(5, 0);for (int k = 0; k < computeDis.size(); k++){dist[computeDis[k]] = distance[computeDis[k]](i, j);}for (int k = 0; k < computeDis.size(); k++)if (dist[computeDis[k]] < mist[computeDis[k]]){mist[computeDis[k]] = dist[computeDis[k]];bestnum[computeDis[k]] = j;}}if (i == counter - 1){result << setw(2) << setfill(' ')  << filenum << " ";}for (int k = 0; k < computeDis.size(); k++){if (series[i].label == series[bestnum[computeDis[k]]].label){hitnum[computeDis[k]]++;}if (i % 100 == 0 || i == counter - 1)cout << "distance" << computeDis[k] << " hitsnum:" << hitnum[computeDis[k]] << " / " << i + 1 << " " << counter << endl;if (i == counter - 1){result << setw(5) << setfill(' ')<< hitnum[computeDis[k]] << " ";}} if (i % 100 == 0 || i == counter - 1)cout << endl;if (i == counter - 1){result << setw(5) << setfill(' ') << counter <<" ";result << setw(4) << setfill(' ')<< WIN << "    " << setw(7) << setfill(' ') << coe << endl;}}finish = clock();//cout << "timeofcost: "<< finish - start << endl;cout << "series length: " << num << endl << endl;}return 0;}

0 0
原创粉丝点击