蛋白质PDB文件读取C++代码

来源:互联网 发布:淘宝号实名认证步骤 编辑:程序博客网 时间:2024/04/28 07:56

在生物信息学中,PDB数据库是非常重要的蛋白质结构数据库。PDB数据文件的读取是进行蛋白质结构相关预测的重要步骤!下面给出了简单的PDB文件读取的C++代码,以供参考:


#include <stdio.h>#include <stdlib.h>#include <math.h>#include <string.h>#include <sstream>#include <iostream>#include <fstream>#include <vector>using namespace std;class Residue {public:vector<double*> atomxyzs;vector<string> atominfos;// each element for example : "ATOM      1  N   GLU    98    "int caind;int cbind;int res_pos;int orig_res_pos;Residue(vector<double*> _atomxyzs, vector<string> _atominfos, int _caind, int _cbind, int _res_pos, int _orig_res_pos){atomxyzs = _atomxyzs;atominfos = _atominfos;caind = _caind;cbind = _cbind;res_pos = _res_pos;orig_res_pos = _orig_res_pos;}};class Protein{private:string pdbpath;// invector<Residue> residues;// outpublic:Protein(string _pdbpath){pdbpath = _pdbpath;readPDB();}int size(){return residues.size();}Residue getIthResidue(int i){return residues[i];}int getIthResidueOriginalIndexInProt(int i){return residues[i].orig_res_pos;}double* getIthResidueCAXYZ(int i){return residues[i].atomxyzs[residues[i].caind];}double* getIthResidueCBXYZ(int i){return residues[i].atomxyzs[residues[i].cbind];}vector<Residue> getResidues(){return residues;}void save(vector<Residue> _residues, string savepath){ofstream fout(savepath);for (int i = 0; i < _residues.size(); i++){Residue res = _residues[i];for (int j = 0 ; j < res.atominfos.size(); j++){char temp[1000];string atominfo = res.atominfos[j];// "ATOM      1  N   GLU    98    "double* xyz = res.atomxyzs[j];//"  66.677"char xstr[9];char ystr[9];char zstr[9];sprintf(xstr, "%8.3f", xyz[0]);sprintf(ystr, "%8.3f", xyz[1]);sprintf(zstr, "%8.3f", xyz[2]);string useless = "  1.00  0.00";fout << atominfo << xstr << ystr << zstr << useless << endl;}}fout.close();}/*char* formatDouble(double v, int ch_num){char tmp[64];sprintf(tmp, "%8.3f %8.3f %8.3f\n", x[i][0], x[i][1], x[i][2]);}*/private:void readPDB(){vector<int> atomindex;vector<string> atominfos;vector<double*> points;int ca_pos_in_res = 0;int cb_pos_in_res = 0;int pre_index = -9999999;ifstream fin(pdbpath);string line;getline(fin, line);string lastline(line);bool isFirstATOM = true;bool isContainCA = false;int tmp_pos = 0;int res_pos = 1;int atom_pos = 1;while (fin.good()){if (line.compare(0, 3, "TER")==0)break;if (line.compare(0, 4, "ATOM")==0){char cstr[50];double x = 0.0;strcpy(cstr, (line.substr(30, 8)).c_str());sscanf(cstr, "%lf", &x);double y = 0.0;strcpy(cstr, (line.substr(38, 8)).c_str());sscanf(cstr, "%lf", &y);double z = 0.0;strcpy(cstr, (line.substr(46, 8)).c_str());sscanf(cstr, "%lf", &z);double* xyz = new double[3];xyz[0] = x;xyz[1] = y;xyz[2] = z;string atominfo = line.substr(0, 30);int now_index = -999999;strcpy(cstr, (line.substr(22, 4)).c_str());sscanf(cstr, "%d", &now_index);if (line.compare(13, 3, "CA ")==0){isContainCA = true;ca_pos_in_res = tmp_pos;if (isFirstATOM){pre_index = now_index;}}if (line.compare(13, 3, "CB ")==0){cb_pos_in_res = tmp_pos;}if (now_index != pre_index){if (isContainCA){if (pre_index != -9999999){Residue tmpResidue(points, atominfos, ca_pos_in_res, cb_pos_in_res, res_pos, pre_index);residues.push_back(tmpResidue);res_pos++;}points.clear();points.push_back(xyz);atominfos.clear();atominfos.push_back(atominfo);tmp_pos = 0;isContainCA = false;if (line.compare(13, 3, "CA ")==0){isContainCA = true;}}else{if (isFirstATOM){points.push_back(xyz);atominfos.push_back(atominfo);}}pre_index = now_index;}else{points.push_back(xyz);atominfos.push_back(atominfo);}isFirstATOM = false;tmp_pos++;}lastline = line;getline(fin, line);}if (isContainCA || lastline.length() >= 16 || lastline.compare(13, 3, "CA ")==0){Residue tmpResidue(points, atominfos, ca_pos_in_res, cb_pos_in_res, res_pos, pre_index);residues.push_back(tmpResidue);}fin.close();}};


1 1