【代码提取】提取一个文件夹里面的所有Java文件的类名函数名和变量名

来源:互联网 发布:cda数据分析师证 编辑:程序博客网 时间:2024/05/16 01:46

读取一个文件夹中的Java文件

#include<iostream>#include<fstream>#include<io.h>#include<vector>#include<string>#include<stdlib.h>using namespace std;string s(_pgmptr);char  filePath[] = "I:\\"; vector<string> files; void getFiles( string path, vector<string>& files ){//文件句柄long   hFile   =   0;//文件信息struct _finddata_t fileinfo;string p;if((hFile = _findfirst(p.assign(path).append("\\*").c_str(),&fileinfo)) !=  -1){do{//如果是目录,迭代之//如果不是,加入列表if((fileinfo.attrib &  _A_SUBDIR)){if(strcmp(fileinfo.name,".") != 0  &&  strcmp(fileinfo.name,"..") != 0)getFiles( p.assign(fileinfo.name), files );}else{files.push_back(p.assign(fileinfo.name) );}}while(_findnext(hFile, &fileinfo)  == 0);_findclose(hFile);}}int find(string s,string extension){int i = -1;for(;++i<s.length();){if(s[i] == '.' && s.substr(i+1,s.length()-i-1) == extension){return i;}}return -1;}string getStr(string name){return "\"" +s + "createCode.exe\" " + name;}int main(){cin>>filePath;////获取该路径下的所有文件  string temp(filePath);s = s.substr(0,s.find_last_of("\\")+1);ofstream fout((temp + "FileName.txt").c_str());cout<<s<<endl;getFiles(filePath, files);  char str[30];  int size = files.size();  int i,j = 0;int index;for (i = 0;i < size;i++)  {  index = find(files[i],"java");if(index!=-1){fout<<files[i]<<endl;system(getStr(temp + files[i].substr(0,index) + ".").c_str());}}  fout.close();return 0;}

上面的代码文件会调用以下代码来分析Java文件提取需要的信息

#pragma warning(disable: 4786)#include<iostream>#include<string>#include<fstream>#include<vector>#include<iterator>#include<ctype.h>#include<io.h>using namespace std;#define PACKAGE   8#define IMPORT    7#define CLASS     3#define VARIABLE  4#define FUNCTION  5#define UNDEINED  -1#define NOTFOUND  0#define HAVEFOUND 1/** * 目前出现的问题: * 1. 不用制表符首行推进,而是用空格。函数没有识别到。 */class C{public:string classname;//类的名字string extends;//类的extendsvector<string> implements;//类的implementsvector<string> var;//类的变量vector<string> function;//类的函数C(){extends = "";classname = "";}C(string extends){classname = "";this->extends = extends;}};vector<string> package;//package集合vector<string> import;//import集合vector<C> _c;//类的结合//char p[50] = "I:\\Body.java";//想要分析的代码文件路径//char q[50] = "I:\\Body.txt";//输出代码文件char * filePath = "I:\\Android\\sample\\res\\drawable-mdpi"; vector<string> files; int judge(string s);//判断字符串名字返回不同的值void D(string& str,char c);//在字符串str中循环删除字符cvoid D(string& str,string s);//删除所有指定的字符串void R(string& str);//以\r为判断删除注释void D(string& str);//以\t为判断删除所有注释vector<string> divideByTab(string &str);//以制表符为分隔符分解字符串成vectorvoid ignorespacetab(const string& str,int& fI);//fI停在非空格和制表符处void ignorealnum(const string&str ,int& fI);//fI停在非数字和字母处void display(vector<string>& vs);//输出vector<string>void display(vector<int>& vi);void display(vector<C>& vc);void display(ofstream& fout);//用文件输出流输出int find(string& str,string s,int& pos);//在pos处,str找sstring findClassName(const string& str,int &begin);//在一个字符串上找类名string findExtendsName(const string& str,int pos);//在一个字符串上找扩展名const vector<string> findImplementsName(const string& str,int pos);//implementsvoid actionscope_ignore(const string& str,int& fI);//忽略一个大的作用域中的所有作用域vector<int> actionscope(const string& str,int& fI);//获取最大的作用域的位置int main(int argc,char* argv[]){if(argc != 2)return 0;cout<<argv[0]<<endl;cout<<argv[1]<<endl;string p(argv[1]);p = p + "java";cout << p <<endl;string q(argv[1]);q = q + "txt";cout<<q<<endl;ifstream fin(p.c_str());//文件输入流,p是代码路径ofstream fout(q.c_str());string str;string temp;int pos = 0;while(getline(fin,temp,'\n')){str.append(temp+"\r\n\t");}R(str);//删除全部注释,跟D(temp)不一样的是 D(temp)以\t判断,这个以\r判断//注释里面出现package和import的话会出事囧while(find(str,"package",pos)){}//连续读取代码中的package名while(find(str,"import",pos)){}//连续读取代码中的import名while(find(str,"class",pos)){}//连续读取代码中的类/*display(package);cout<<endl;display(import);cout<<endl;display(_c);//输出类的信息*/display(fout);fin.close();fout.close();/*string str=" 123456789 ";cout<<str.find("4",0,1);*///string str2 = "start " + q;//system(str2.c_str());return 0;}int judge(string s){if(s=="package"){return PACKAGE;}else if(s=="import"){return IMPORT;}else if(s=="class"){return CLASS;}else if(s=="vaiabler"){return VARIABLE;}else if(s=="function"){return IMPORT;}return UNDEINED;}void D(string& str,char c){if(c == '='){//如果字符是 '=' , 则把 = 等号 和 ; 分号之间的信息除去,例如:int a = 5; 把=号、空格和5给擦除int index_s = 0;int index_e = 0;while(index_s<str.length()){index_s = str.find(c,index_s);//找 '=' 的位置if(index_s != string::npos){index_e = str.find(';',index_s+1);//找 ';' 的位置if(index_e != string::npos){str.replace(index_s,index_e-index_s,"");//擦除index_s = index_e;}else{str.erase(index_s);}}}return;}else if(c == ' '){int index = 0;while(index<str.length()){index = str.find(c,index);if(index != string::npos){int fI = index;//如果import的库里面有多个空格,那么仅仅保留一个while(index<str.length() && str[++index] == ' '){}if(index - 1 != fI){str.erase(fI,index - fI - 1);}}}return;}//除'='号以外的字符找到直接删除int index = 0;while(index<str.length()){index = str.find(c,index);if(index != string::npos){str.erase(index,1);}}}//除去str中的所有s字符串void D(string& str,string s){int index;do{index = str.find(s);if(index != string::npos){str.replace(index,s.length(),"");}else break;}while(1);}void R(string& str){int index;do{index = str.find("//");//找到 // 的位置if(index != string::npos){int index_n = str.find("\r\n",index+2);//找到 回车的位置//system("pause");if(index_n != string::npos){str.replace(index,index_n + 2 - index,"");}else if(index<str.length()){str.replace(index,str.length()-index + 1,"");}else{break;}}else break;}while(1);}//删除注释注意语法要正确。否则会删除一些有用的信息。//这里的语法正确是有首行推进//依据//和制表符来判别注释void D(string& str){int index;do{index = str.find("//");//找到 // 的位置if(index != string::npos){int index_n = str.find('\t',index+2);//找到 制表符的位置if(index_n != string::npos){str.replace(index,index_n - index,"");}else if(index<str.length()){str.replace(index,str.length()-index,"");}else{break;}}else break;}while(1);//接下来找/*和*/的注释do{index = str.find("/*");if(index != string::npos){int index_n = str.find("*/",index+2);if(index_n != string::npos){str.replace(index,index_n+2 - index,"");}else break;}else break;}while(1);}//要求是用制表符进行首行推进,而不是用四个空格。//根据制表符来分解一个string为一个vector容器vector<string> divideByTab(string &str){vector<string> vs;int index_s = 0;int index_e;index_s = str.find('\t',index_s);if(index_s != string::npos){index_e = str.find('\t',index_s+1);if(index_e != string::npos){do{if(index_s+1 != index_e){vs.push_back(str.substr(index_s+1 , index_e - index_s-1));}index_s = index_e;index_e = str.find('\t',index_e+1);}while(index_e != string::npos);}}if(index_s < str.length()){vs.push_back(str.substr(index_s+1,str.length() - index_s - 1));}return vs;}//忽略空格和制表符void ignorespacetab(const string& str,int &fI){while(fI<str.length() && (str[fI] == ' ' || str[fI] == '\t')){++fI;}}//忽略字母和数字void ignorealnum(const string&str ,int& fI){while(fI<str.length() && isalnum(str[fI])){++fI;}}//以下都是输出语句void display(vector<string>& vs){for(vector<string>::iterator b = vs.begin(); b!=vs.end();++b){cout<<*b<<endl;}}void display(vector<int>& vi){for(vector<int>::iterator b = vi.begin(); b!=vi.end();++b){cout<<*b<<endl;}}void display(vector<C>& vc){for(vector<C>::iterator b = vc.begin(); b!=vc.end();++b){cout<<b->classname<<" "<<b->extends<<endl;display(b->implements);display(b->var);display(b->function);}}void display(ofstream& fout){vector<string>::iterator b;int pos;for(b = package.begin(); b!=package.end();++b){fout<<*b<<endl;}fout<<endl;for(b = import.begin(); b!=import.end();++b){fout<<*b<<endl;}fout<<endl;vector<C>::iterator i;for(i = _c.begin(); i!=_c.end() ; ++i){fout<<i->classname;if(i->extends.size() != 0){fout<<" e "<<i->extends;}if(i->implements.size()!=0){fout<<" i";for(b = i->implements.begin(); b != i->implements.end(); ++b){fout<<" "<<*b;}}fout<<endl;for(b = i->var.begin(); b != i->var.end(); ++b){pos = 0;D(*b,"\r\n");ignorespacetab(*b,pos);if(pos != b->length()){fout<<b->substr(pos,b->length()-pos)<<endl;}}for(b = i->function.begin(); b != i->function.end(); ++b){pos = 0;D(*b,"\r\n");ignorespacetab(*b,pos);if(pos != b->length()){fout<<b->substr(pos,b->length()-pos)<<endl;}}}}/**0 1 2 3 4 5 6 7 ...  nIp a c k a g e   ...  ;fI , nI - fI 取得是fI 到 nI-1下标的子串*/int find(string& str,string s,int& pos){int type = judge(s);int fI,nI;//firstIndex,nextIndexstring temp = "";switch(type){case PACKAGE:case IMPORT:fI = str.find(s,pos);//先找到package或import的位置if(fI != string::npos){//判断package和import是否在class里面的注释里int cI = str.find("class",pos);if(cI != string::npos && cI<fI){return NOTFOUND;}fI+=type;//跳过package 或import 两个字符串(含一个空格)ignorespacetab(str,fI);//然后忽略剩余的空格或制表符,如果有nI = str.find(';',fI);//找到分号temp = str.substr(fI,nI-fI);//package名或import名//除去多余的制表符和空格D(temp,'\t');D(temp,' ');//PACKAGE 8 IMPORT 7if(type == 8){package.push_back(temp);}else if(type == 7){import.push_back(temp);}//pos位置为分号右边一位。pos = nI + 1;return HAVEFOUND;}else{return NOTFOUND;}break;case CLASS:fI = str.find(s,pos);//找到"class"if(fI != string::npos){fI += 6;//6 = strlen("class")+1;C theclass;//C类int lBlock = str.find('{',fI) ;// 找{if(lBlock != string::npos){++lBlock;string classline = str.substr(fI,lBlock-fI);//获得类信息的第一行int begin = 0;const string cn = findClassName(classline,begin);//classnameconst string en = findExtendsName(classline,begin);//extendsnameconst vector<string> in = findImplementsName(classline,begin);//implementsnametheclass.classname = cn;theclass.extends = en;theclass.implements = in;int cur_index = lBlock;//current_indexvector<int> vi = actionscope(str,cur_index);//获取函数和数组变量初始化等 { 和 } 的位置//display(vi);string temp = "";//排除所有作用域内的字符串for(vector<int>::iterator vit = vi.begin(); vit != vi.end(); vit += 2){temp += str.substr(*vit+1,*(vit+1)-*(vit)-1);}//temp.find_last_ofD(temp,"@Override");//删除@Override字符串D(temp);//删除注释D(temp,'=');//删除 = 号 和 ; 号之间的信息,包括=号,不包括;号vector<string> vs = divideByTab(temp);//根据制表符分解字符串int sem_index;//分号下标//根据分号来区分函数和变量for(vector<string>::iterator b = vs.begin(); b!=vs.end();++b){sem_index = b->find_last_of(';');if( sem_index != string::npos){theclass.var.push_back(b->substr(0,sem_index));}else{theclass.function.push_back(*b);}}_c.push_back(theclass);pos = fI + 1;//下一个搜索位置从fI开始,因为可能会出现类里面嵌套类的情况return HAVEFOUND;}}else{return NOTFOUND;}break;case VARIABLE:break;case FUNCTION:break;case UNDEINED:break;};return NOTFOUND;}string findClassName(const string& classline,int &begin){//cout<<classline<<endl;ignorespacetab(classline,begin);int CNS = begin;//classname_startignorealnum(classline,begin);int CNE = begin;//classname_end//cout<<classline.substr(CNS,CNE-CNS)<<endl;return classline.substr(CNS,CNE-CNS);}string findExtendsName(const string& str,int pos){int es = str.find("extends",pos);//extends_startif( es != string::npos ){es += 8;ignorespacetab(str,es);int ens = es;//extendsname_startignorealnum(str,es);int ene = es;//extendsname_end;return str.substr(ens,ene-ens);}return "";}const vector<string> findImplementsName(const string& str,int pos){vector<string> implements;int is = str.find("implements",pos);//implements_startif( is != string::npos){is += 11;ignorespacetab(str,is);int ins = is;//implements_name_startint ine = is;//implements_name_endwhile(is<str.length()){if(str[is] == ' ' || str[is] == '\t'){++is;continue;}if(str[is] == ',' || str[is] == '{'){//cout<<str.substr(ins,ine-ins);implements.push_back(str.substr(ins,ine-ins));ins = ++is;continue;}ine = ++is;}}return implements;}//以下是判断作用域的位置,并返回合适的 { 和 } 位置来将变量名和函数名分割出来。void actionscope_ignore(const string& str,int& fI){int lBlock_num = 1;while(lBlock_num){++fI;if(str[fI] == '{'){++lBlock_num;}else if(str[fI] == '}'){--lBlock_num;}}}vector<int> actionscope(const string& str,int& fI){vector<int> index;//cout<<fI-1<<endl;index.push_back(fI-1);int lBlock_num = 1;while(lBlock_num){//cout<<str[fI];if(str[fI] == '{'){//cout<<fI<<endl;index.push_back(fI);//获取'{'的下标actionscope_ignore(str,fI);//cout<<fI<<endl;index.push_back(fI);//获得匹配上面的'{'的'}'的下标}else if(str[fI] == '}'){lBlock_num = 0;//cout<<fI<<endl;index.push_back(fI);continue;}++fI;}return index;}

在第一个代码文件的exe文件中输入Java文件所在目录

单斜杆  \  也是可以的

回车以后生成对应的txt文件

Java文件所在目录

生成的txt文件

以上两个代码文件的exe文件存放方式。1.exe是第一个代码,createCode.exe是第二个代码


0 0
原创粉丝点击