[2016/11/30]项目V1.0：计算两个氨基酸之间的中心碳原子距离和最近距离

来源：互联网发布：pymongo 创建数据库编辑：程序博客网时间：2024/05/01 01:34

代码功能

给出若干文件，计算出文件中特定肽链中特定某几个氨基酸间的中心碳原子距离和最近距离，并输出到文件。
（输出到文件这里，并不确定怎样的数据好处理，所以还没写完，想好了补上。另外，计算特定肽链这个功能，只能在代码里改，不好，日后完善。）

缺点

1.链表的数据存储方式简直不能再麻烦，线性表加链表的方式更好，如果有时间再完善这个。
2.一次算完所有肽链的数据，不好。应该一条一条算，算完释放内存，再接着分配，这样可以优化内存使用。还是有空再改。

一些吐槽

简直脑子一热就接了这个工作，orz。
c语言处理文本简直太麻烦哦嗬嗬嗬
用完内存还要释放好麻烦（不开心脸）
还是python大法好啊！！

代码

运行环境：linux Ubuntu

makefile:

project : main.o \          biobjects.o\          config.o\          getdatafunc.o\          globaltool.o\    gcc -o project main.o biobjects.o config.o getdatafunc.o -lm globaltool.o\        && ./projectmain.o : main.cpp    gcc -c main.cppbiobjects.o : biobjects.cpp    gcc -c biobjects.cppconfig.o : config.cpp    gcc -c config.cppgetdatafunc.o : getdatafunc.cpp -lm    gcc -c getdatafunc.cppglobaltool.o : globaltool.cpp    gcc -c globaltool.cppclean :    rm project main.o biobjects.o config.o getdatafunc.o globaltool.o

main.cpp

#include "config.h"#include "getdatafunc.h"FILE * openFile(int n){    char mode_filename[50];    memset(mode_filename, 0, 50);    sprintf(mode_filename, "%s", FILE_NAME);    char target_string[2] = {0};    sprintf(target_string, "0");    char* p = NULL;    p = strstr(mode_filename, target_string);    int len_front = p - mode_filename;    char filename[50] = {0};    for(int i = 0; i<len_front; i++)        filename[i] = mode_filename[i];    sprintf(filename + strlen(filename), "%d", n);    int pianyi = strlen(filename);    int count = 0;    for(int i = len_front+1; i<strlen(mode_filename); i++){        filename[pianyi + count++] = mode_filename[i];    }    FILE *f;    if (NULL == (f = fopen(filename ,"r"))){        printf("open file failed\n");        return NULL;    }    printf("%s\n",filename);    return f;}int main(){    //printf("2333");    output_file = open(OUTPUT_FILE_NAME, O_RDWR|O_CREAT, 0666);    int i;    for(i = BEGIN_FILE_NUM; i<=END_FILE_NUM; i++){        //printf("line :%d\n",__LINE__);        FILE *f = openFile(i);        //写入        const char *massage_of_file = "aa_number : ";        char current[6];        write(output_file, massage_of_file, strlen(massage_of_file));        for(int j = 0; j<AA_AMOUNT; j++){            memset(current, 0, sizeof(current));            sprintf(current, "%d", aa_list[j]);            write(output_file, current, strlen(current));            write(output_file, seperet, sizeof(char));        }        write(output_file, change_line, strlen(change_line));        //写入        getChainType(f); //算出肽链数量，找出每条的名字        printf("%d\n",chain_amount);        getMassage(chain_amount, f);        printf("--------------get information finished!------------\n");        int k;        for(k = 0; k < chain_amount; k++){            if(k == 2)                cacuDistance(k);            //printf("2333\n");        }        AA* aa_point;        AA* aa_temp_point;        AA* aa_temp_pre_point;        atom* atom_point;        atom* atom_temp_point;        atom* atom_temp_pre_point;        for(int i = 0; i<chain_amount; i++){            aa_point = head[i].next;            while(aa_point ->aa_next){                aa_temp_point = aa_point;                while(aa_temp_point->aa_next){                    aa_temp_pre_point = aa_temp_point;                    aa_temp_point = aa_temp_point ->aa_next;                }                atom_point = aa_temp_point ->atom_next;                while(atom_point ->atom_next){                    atom_temp_point = atom_point;                    while(atom_temp_point ->atom_next){                        atom_temp_pre_point = atom_temp_point;                        atom_temp_point = atom_temp_point ->atom_next;                    }                //  printf("line :%d\n",__LINE__);                    free(atom_temp_point);                    atom_temp_pre_point ->atom_next = NULL;                }                free(atom_point);                aa_temp_point ->atom_next = NULL;                //printf("line :%d\n",__LINE__);                free(aa_temp_point);                aa_temp_pre_point ->aa_next = NULL;            }            free(aa_point);            head[i].next = NULL;        }        memset(chain_type, 0, MAX_CHAIN_AMOUTN);        fclose(f);    }    close(output_file);    return 0;}

globaltool.h

#ifndef _GLOBALTOOL_H#define _GLOBALTOOL_H#include <stdio.h>#include <stdlib.h>#include <string.h>#include <limits.h>#include <float.h>#include <unistd.h>#include <sys/types.h>#include <sys/stat.h>#include <fcntl.h>#include <math.h>extern const char *seperet;extern const char *change_line;extern const char* mid_carbon_explan;extern const char* nearest_atom_explan;extern const char* nearest_atom_distance_explan;//以上都是写入文件要用到的参数，可能鸡肋extern int chain_amount; //肽链数量extern int output_file;//数据结果整理出来后，输出到文件。此文件的文件描述符。extern char buf[256];//随便用的缓冲区#endif

globaltool.cpp

#include "globaltool.h"const char *seperet = " ";const char *change_line = "\r\n";const char* mid_carbon_explan = "center_carbon_distance : ";const char* nearest_atom_explan = "nearet_atom : ";const char* nearest_atom_distance_explan = "nearet_atom_distance : ";int chain_amount;int output_file;char buf[256];

config.h

#ifndef _CONFIG_H#define _CONFIG_H#include "biobjects.h"#define UNAMEING_CHAIN_NAME '*' //给未命名肽链的名字#define CHAIN_TYPE_POS 21 //肽链类型位置#define AA_AMOUNT 9 //要计算的氨基酸数量#define FILE_AMOUNT 1//文件数量#define FILE_ROOT "./"//文件路径#define FILE_NAME "pep0-bestmodel-SC-min.pdb"//文件名格式，把文件中的数字替换成0#define BEGIN_FILE_NUM 1//从第几个文件开始处理#define END_FILE_NUM 1//从第几个文件结束#define OUTPUT_FILE_NAME "output.txt"extern int aa_list[AA_AMOUNT]; //要计算的氨基酸序号extern target_aa_dis aa_distance[AA_AMOUNT][AA_AMOUNT];//目标氨基酸的距离#endif

config.cpp

#include "config.h"int aa_list[AA_AMOUNT] = {85,91,94,109,113,117,118,120,138}; //序号必须不同，是要计算的氨基酸序号target_aa_dis aa_distance[AA_AMOUNT][AA_AMOUNT]; //暂时没啥用

biobjects.h

#ifndef _BIOBJECTS_H#define _BIOBJECTS_H#define MAX_CHAIN_AMOUTN 10//原子typedef struct atom{    char atom_name[6];    double dis_x;    double dis_y;    double dis_z;    atom *atom_next;}atom;//氨基酸typedef struct AA{    int order_number;    char AAname[6];    AA *aa_next;    atom *atom_next;}AA;//肽链信息typedef struct chain_head{    char chain_name;    int chain_len;    AA *next;}chain_head;//目标氨基酸位置信息typedef struct target_aa_dis{    char atom_1[6];    char atom_2[6];    double min_atom_dis;    double central_carbon_dis;}target_aa_dis;extern char chain_type[MAX_CHAIN_AMOUTN];//保存肽链名字extern chain_head head[MAX_CHAIN_AMOUTN];//肽链头部，存储信息用//存储信息#endif

biobjects.cpp

#include "biobjects.h"char chain_type[MAX_CHAIN_AMOUTN];chain_head head[MAX_CHAIN_AMOUTN];

getdatafunc.h

#ifndef _GETDATAFUNC_H#define _GETDATAFUNC_H#include "config.h"#include "globaltool.h" void getChainType(FILE *f);//先得出肽链种类void readInformationForUnamedChain(int chain_type_point);//读没名字的肽链void readInformationForNamedChain(int chain_type_point);//读有名字的肽链int getMassage(int cacu, FILE *f);//整理数据double getDistanceByCoordinate(atom *p1, atom *p2);//计算两个原子之间距离void cacuDistance(int chain_type_point);//计算两个aa中心c原子距离，计算两个aa的最小距离#endif

getdatafunc.cpp

#include "getdatafunc.h"void getChainType(FILE *f){    int chain_type_num = 0;    bool end = 0;    char last_head[4] = {0};    char type;    int chain_len = 0;    while(1){        //特殊情况处理        if (NULL == fgets(buf, 256, f)){ printf("___________\n"); break;}        if (buf[0] == '\n') continue;        //另开新的肽链 or 没有可读                            if ((buf[0] != 'A') && (buf[1] != 'T')){            //开新肽链            if(buf[0] == 'T' && buf[2] == 'R'){                //printf("2333");                if(buf[CHAIN_TYPE_POS] == ' ')                    head[chain_type_num].chain_name = UNAMEING_CHAIN_NAME;                else                    head[chain_type_num].chain_name = buf[CHAIN_TYPE_POS];                head[chain_type_num].chain_len = chain_len;                chain_len = 0;                chain_type_num ++;                printf("chain name : %c,  num:%d\n",head[chain_type_num - 1].chain_name, chain_type_num - 1);            }            else if(strcmp(last_head, "TER") == 0){                chain_amount = chain_type_num;                break;            }        }        else{            chain_len ++; //其实算的是原子个数        }        last_head[0] = buf[0];        last_head[1] = buf[1];        last_head[2] = buf[2];        //printf("%s\n", last_head);    }    //指针移到开头    fseek(f, 0, SEEK_SET);    return;}void readInformationForUnamedChain(int chain_type_point){    AA* aa_point = head[chain_type_point].next;    atom *atom_point;    int line;    char atom_name[6] = {0};    char aa_name[6] = {0};    int aa_order_number;    double atom_dis_x;    double atom_dis_y;    double atom_dis_z;    sscanf(buf+5, "%d%s%s%d%lf%lf%lf",\        &line, atom_name, aa_name, &aa_order_number, &atom_dis_x, &atom_dis_y, &atom_dis_z);    //printf("%d %s %s %d %lf %lf %lf\n",\        line, atom_name, aa_name, aa_order_number, atom_dis_x, atom_dis_y, atom_dis_z);    if(aa_point){//非肽链起始                    while(aa_point ->aa_next) //找到尾部            aa_point = aa_point ->aa_next;    }    else{//肽链起始        if((aa_point = (AA *)malloc(sizeof(struct AA))) == NULL){            printf("LINE %d, malloc error\n",__LINE__);            exit(0);        }        aa_point ->aa_next = NULL;        aa_point ->atom_next = NULL;        head[chain_type_point].next = aa_point;        aa_point->order_number = -1;    }    if (aa_order_number != aa_point->order_number){ //新氨基酸的新原子        //新氨基酸        if((aa_point->aa_next = (AA *)malloc(sizeof(struct AA))) == NULL){            printf("LINE %d, malloc error\n",__LINE__);            exit(0);        }        aa_point = aa_point ->aa_next;        aa_point ->order_number = aa_order_number;        memset(aa_point ->AAname, 0, 6);        sprintf(aa_point ->AAname, "%s", aa_name);        aa_point -> aa_next = NULL;        aa_point ->atom_next = NULL;        //新原子        if((aa_point->atom_next = (atom *)malloc(sizeof(struct atom))) == NULL){            printf("LINE %d, malloc error\n",__LINE__);            exit(0);        }        atom_point = aa_point ->atom_next;        aa_point->atom_next->atom_next = NULL;    }    else{   //当前氨基酸的新原子        atom_point = aa_point ->atom_next;        while(atom_point ->atom_next)            atom_point = atom_point ->atom_next;        if((atom_point->atom_next = (atom *)malloc(sizeof(struct atom))) == NULL){            printf("LINE %d, malloc error\n",__LINE__);            exit(0);        }        atom_point = atom_point ->atom_next;    }    //原子存储    atom_point ->dis_x = atom_dis_x;    atom_point ->dis_y = atom_dis_y;    atom_point ->dis_z = atom_dis_z;    atom_point ->atom_next = NULL;    memset(atom_point ->atom_name, 0, 6);    sprintf(atom_point ->atom_name, "%s", atom_name);    //printf("%d %s %s %d %lf %lf %lf\n",\        line, atom_point->atom_name, aa_point ->AAname, \            aa_point->order_number, atom_point ->dis_x, atom_point ->dis_y, atom_point ->dis_z);}void readInformationForNamedChain(int chain_type_point){    AA* aa_point = head[chain_type_point].next;    atom *atom_point;    int line;    char atom_name[6] = {0};    char aa_name[6] = {0};    char chain_name[6] = {0};    int aa_order_number;    double atom_dis_x;    double atom_dis_y;    double atom_dis_z;    sscanf(buf+5, "%d%s%s%s%d%lf%lf%lf",\        &line, atom_name, aa_name, chain_name, &aa_order_number, &atom_dis_x, &atom_dis_y, &atom_dis_z);//  printf("%s %d %s %s %d %lf %lf %lf\n",\        chain_name, line, atom_name, aa_name, aa_order_number, atom_dis_x, atom_dis_y, atom_dis_z);    if(aa_point){//非肽链起始                    while(aa_point ->aa_next) //找到尾部            aa_point = aa_point ->aa_next;    }    else{//肽链起始        if((aa_point = (AA *)malloc(sizeof(struct AA))) == NULL){            printf("LINE %d, malloc error\n",__LINE__);            exit(0);        }        aa_point ->aa_next = NULL;        aa_point ->atom_next = NULL;        head[chain_type_point].next = aa_point;        aa_point->order_number = -1;    }    if (aa_order_number != aa_point->order_number){ //新氨基酸的新原子        //新氨基酸        if((aa_point->aa_next = (AA *)malloc(sizeof(struct AA))) == NULL){            printf("LINE %d, malloc error\n",__LINE__);            exit(0);        }        aa_point = aa_point ->aa_next;        aa_point ->order_number = aa_order_number;        memset(aa_point ->AAname, 0, 6);        sprintf(aa_point ->AAname, "%s", aa_name);        aa_point -> aa_next = NULL;        aa_point ->atom_next = NULL;        //新原子        if((aa_point->atom_next = (atom *)malloc(sizeof(struct atom))) == NULL){            printf("LINE %d, malloc error\n",__LINE__);            exit(0);        }        aa_point ->atom_next ->atom_next = NULL;        atom_point = aa_point ->atom_next;    }    else{   //当前氨基酸的新原子        atom_point = aa_point ->atom_next;        while(atom_point ->atom_next)            atom_point = atom_point ->atom_next;        if((atom_point->atom_next = (atom *)malloc(sizeof(struct atom))) == NULL){            printf("LINE %d, malloc error\n",__LINE__);            exit(0);        }        atom_point ->atom_next ->atom_next = NULL;        atom_point = atom_point ->atom_next;    }    //原子存储    atom_point ->dis_x = atom_dis_x;    atom_point ->dis_y = atom_dis_y;    atom_point ->dis_z = atom_dis_z;    atom_point ->atom_next = NULL;    memset(atom_point ->atom_name, 0, 6);    sprintf(atom_point ->atom_name, "%s", atom_name);//  printf("%d %s %s %d %lf %lf %lf\n",\        line, atom_point->atom_name, aa_point ->AAname, \            aa_point->order_number, atom_point ->dis_x, atom_point ->dis_y, atom_point ->dis_z);}int getMassage(int cacu, FILE *f){    int chain_tpye_point = 0;    while(cacu){        if(head[chain_tpye_point].chain_name == UNAMEING_CHAIN_NAME){            printf("line :%d     cacu = %d.\n",__LINE__,cacu);            while(1){                if (NULL == fgets(buf, 256, f)) {                    cacu = 0;                //  printf("LINE %d,can't get a line\n",__LINE__);                    break;                }                if ((buf[0] != 'A') && (buf[1] != 'T')){                    if(buf[0] == 'T'&& buf[2] == 'R'){                        cacu--;                        //printf("%c\n",head[chain_tpye_point].chain_name);                        chain_tpye_point++;                        break;                    }                    continue;                }                else if((buf[0] == 'A') && (buf[1] == 'T')){                    //printf("yes!\n");                    readInformationForUnamedChain(chain_tpye_point);                }            }        }        else{            //printf("line :%d     cacu = %d.\n",__LINE__,cacu);            while(1){                if (NULL == fgets(buf, 256, f)) {                    cacu = 0;                    printf("LINE %d,can't get a line\n",__LINE__);                    break;                }                if ((buf[0] != 'A') && (buf[1] != 'T')){                    if(buf[0] == 'T' && buf[2] == 'R'){                        cacu--;                        chain_tpye_point++;                        break;                    }                    continue;                }                else if((buf[0] == 'A') && (buf[1] == 'T')){                    readInformationForNamedChain(chain_tpye_point);                }            }           }    }}double getDistanceByCoordinate(atom *p1, atom *p2){    double x1 = p1 -> dis_x;    double y1 = p1 -> dis_y;    double z1 = p1 -> dis_z;     double x2 = p2 -> dis_x;    double y2 = p2 -> dis_y;    double z2 = p2 -> dis_z; //  printf("%lf %lf %lf\n", p1 -> dis_x,  p1 -> dis_y,  p1 -> dis_z);//  printf("%lf %lf %lf\n", p2 -> dis_x,  p2 -> dis_y,  p2 -> dis_z);    double d1 = (x1 - x2) * (x1 - x2);    double d2 = (y1 - y2) * (y1 - y2);    double d3 = (z1 - z2) * (z1 - z2);    double result = sqrt(d1 + d2 + d3);//  printf("res: %lf\n",result);    return result;}void cacuDistance(int chain_type_point){    printf("hhhhhhhhh %d\n",chain_type_point);    printf("---name :%c---", head[chain_type_point].chain_name);    int amount = AA_AMOUNT;//要计算几个氨基酸    AA* aa_point = head[chain_type_point].next;    AA target_aa[AA_AMOUNT];    //获取目标氨基酸，把它们的信息存储起来    while(amount && (aa_point !=NULL)){    //  printf("aa_point->order_number:%d\n",aa_point->order_number);        for(int i = 0; i<AA_AMOUNT; i++){            //printf("%d\n",i);            if((aa_point ->order_number) == aa_list[i]){                memset(target_aa[i].AAname, 0, 6);                sprintf(target_aa[i].AAname, "%s", aa_point ->AAname);            //  printf("xiabiao: %d,num :%d, name:%s\n",i,aa_list[i], target_aa[i].AAname);                target_aa[i].atom_next = aa_point ->atom_next;                target_aa[i].order_number = aa_point ->order_number;                amount--;                break;            }        }        aa_point = aa_point -> aa_next;    }    //printf("line :%d-----debug-----\n",__LINE__);    double min_dis = DBL_MAX;    double two_atom_dis;    double cc_atom_dis;    AA *aa_one;    AA *aa_two;    atom *atom_aa_one;    atom *atom_aa_two;    atom *atom_aa_two_temp;    char min_dis_atom_aa_one[6];    char min_dis_atom_aa_two[6];    int i, j;    printf("-------------- <肽链%c> ----------------\n", head[chain_type_point].chain_name);    double x,y,z;    for(i = 0; i<AA_AMOUNT; i++){        for(j = i + 1; j<AA_AMOUNT; j++){            //if(i == j) continue;            min_dis = DBL_MAX;            aa_one = &target_aa[i];            aa_two = &target_aa[j];            atom_aa_one = aa_one->atom_next;            atom_aa_two = aa_two->atom_next;            printf(">>正在计算的氨基酸标号是：%d and %d \n",\                aa_one->order_number, aa_two->order_number);            while(atom_aa_one){                atom_aa_two_temp = atom_aa_two;                while(atom_aa_two_temp){                    if((strcmp("C", atom_aa_two_temp ->atom_name) == 0) \                        && (strcmp(atom_aa_one ->atom_name, atom_aa_two_temp ->atom_name) == 0)){                            cc_atom_dis = getDistanceByCoordinate(atom_aa_one, atom_aa_two_temp);                            printf(">>中心碳原子之间的距离是： %lf \n",cc_atom_dis);/*                            write(output_file, mid_carbon_explan, strlen(mid_carbon_explan));                            char current[sizeof(double)];                            memset(current, 0, sizeof(current));                            sprintf(current, "%lf", cc_atom_dis);                            write(output_file, current, strlen(current));                            write(output_file, change_line, strlen(change_line));*/                    }                    two_atom_dis = getDistanceByCoordinate(atom_aa_one, atom_aa_two_temp);                    //printf("%lf\n",two_atom_dis);                    if(two_atom_dis < min_dis){                        min_dis = two_atom_dis;                        memset(min_dis_atom_aa_one, 0, 6);                        sprintf(min_dis_atom_aa_one, "%s", atom_aa_one ->atom_name);                        memset(min_dis_atom_aa_two, 0, 6);                        sprintf(min_dis_atom_aa_two, "%s", atom_aa_two_temp ->atom_name);                        x = atom_aa_two_temp ->dis_x;                        y = atom_aa_two_temp ->dis_y;                        z = atom_aa_two_temp ->dis_z;                    }                    atom_aa_two_temp = atom_aa_two_temp -> atom_next;                }                atom_aa_one = atom_aa_one -> atom_next;            }            printf("   离得最近的原子是：%d 号氨基酸的 %s 原子 和 %d 号氨基酸的 %s 原子\n",\                aa_list[i], min_dis_atom_aa_one, aa_list[j], min_dis_atom_aa_two);            printf("%lf,%lf,%lf\n",x,y,z);            printf("   它们的距离是 %lf \n\n", min_dis);/*            memset(buf, 0, sizeof(buf));            write(output_file, nearest_atom_explan, strlen(nearest_atom_explan));            sprintf(buf, "%s %s\r\n", min_dis_atom_aa_one, min_dis_atom_aa_two);            write(output_file, buf, strlen(buf));            memset(buf, 0, sizeof(buf));            sprintf(buf, "%s%lf\r\n", nearest_atom_distance_explan, min_dis);            write(output_file, buf, strlen(buf));*/        }    }}

0 0