C++ Parse URL

来源:互联网 发布:js 修改less 变量 编辑:程序博客网 时间:2024/05/17 03:30

urlregex.cpp

//sudo apt-get install libboost-all-dev;//g++ urlregex.cpp -lboost_regex#include <string>#include <iostream>#include <boost/regex.hpp> using std::string;using std::cout;using std::endl;using std::stringstream; void parse_url(const string& url) //with boost{    boost::regex ex("(http|https)://([^/ :]+):?([^/ ]*)(/?[^ #?]*)\\x3f?([^ #]*)#?([^ ]*)");    boost::cmatch what;    if(regex_match(url.c_str(), what, ex))     {        string protocol = string(what[1].first, what[1].second);        string domain   = string(what[2].first, what[2].second);        string port     = string(what[3].first, what[3].second);        string path     = string(what[4].first, what[4].second);        string query    = string(what[5].first, what[5].second);        cout << "[" << url << "]" << endl;        cout << protocol << endl;        cout << domain << endl;        cout << port << endl;        cout << path << endl;        cout << query << endl;        cout << "-------------------------------" << endl;    }} int main(int argc, char* argv[]){    parse_url("http://www.google.com");    parse_url("https://mail.google.com/mail/");    parse_url("https://www.google.com:443/webhp?gws_rd=ssl");    return 0;}

main.cpp (another example, without boost)

#include <string>#include <iostream> using std::string;using std::cout;using std::endl;using std::stringstream; string _trim(const string& str){    size_t start = str.find_first_not_of(" \n\r\t");    size_t until = str.find_last_not_of(" \n\r\t");    string::const_iterator i = start==string::npos ? str.begin() : str.begin() + start;    string::const_iterator x = until==string::npos ? str.end()   : str.begin() + until+1;    return string(i,x);} void parse_url(const string& raw_url) //no boost{    string path,domain,x,protocol,port,query;    int offset = 0;    size_t pos1,pos2,pos3,pos4;    x = _trim(raw_url);    offset = offset==0 && x.compare(0, 8, "https://")==0 ? 8 : offset;    offset = offset==0 && x.compare(0, 7, "http://" )==0 ? 7 : offset;    pos1 = x.find_first_of('/', offset+1 );    path = pos1==string::npos ? "" : x.substr(pos1);    domain = string( x.begin()+offset, pos1 != string::npos ? x.begin()+pos1 : x.end() );    path = (pos2 = path.find("#"))!=string::npos ? path.substr(0,pos2) : path;    port = (pos3 = domain.find(":"))!=string::npos ? domain.substr(pos3+1) : "";    domain = domain.substr(0, pos3!=string::npos ? pos3 : domain.length());    protocol = offset > 0 ? x.substr(0,offset-3) : "";    query = (pos4 = path.find("?"))!=string::npos ? path.substr(pos4+1) : "";    path = pos4!=string::npos ? path.substr(0,pos4) : path;    cout << "[" << raw_url << "]" << endl;    cout << "protocol: " << protocol << endl;    cout << "domain: " << domain << endl;    cout << "port: " << port << endl;    cout << "path: " << path << endl;    cout << "query: " << query << endl;} int main(int argc, char* argv[]){    parse_url("http://www.google.com");    parse_url("https://mail.google.com/mail/");    parse_url("https://www.google.com:443/webhp?gws_rd=ssl");    return 0;}

main.cpp (one last example)


#include <string>#include <stdint.h>#include <cstring>#include <sstream>#include <algorithm> #include <iostream> using std::cerr; using std::cout; using std::endl; using std::string; class HTTPURL{    private:        string _protocol;// http vs https        string _domain;  // mail.google.com        uint16_t _port;  // 80,443        string _path;    // /mail/        string _query;   // [after ?] a=b&c=b     public:        const string &protocol;        const string &domain;        const uint16_t &port;        const string &path;        const string &query;         HTTPURL(const string& url): protocol(_protocol),domain(_domain),port(_port),path(_path),query(_query)        {            string u = _trim(url);            size_t offset=0, slash_pos, hash_pos, colon_pos, qmark_pos;            string urlpath,urldomain,urlport;            uint16_t default_port;             static const char* allowed[] = { "https://", "http://", "ftp://", NULL};            for(int i=0; allowed[i]!=NULL && this->_protocol.length()==0; i++)            {                const char* c=allowed[i];                if (u.compare(0,strlen(c), c)==0) {                    offset = strlen(c);                    this->_protocol=string(c,0,offset-3);                }            }            default_port = this->_protocol=="https" ? 443 : 80;            slash_pos = u.find_first_of('/', offset+1 );            urlpath = slash_pos==string::npos ? "/" : u.substr(slash_pos);            urldomain = string( u.begin()+offset, slash_pos != string::npos ? u.begin()+slash_pos : u.end() );            urlpath = (hash_pos = urlpath.find("#"))!=string::npos ? urlpath.substr(0,hash_pos) : urlpath;            urlport = (colon_pos = urldomain.find(":"))!=string::npos ? urldomain.substr(colon_pos+1) : "";            urldomain = urldomain.substr(0, colon_pos!=string::npos ? colon_pos : urldomain.length());            this->_domain = _tolower(urldomain);            this->_query = (qmark_pos = urlpath.find("?"))!=string::npos ? urlpath.substr(qmark_pos+1) : "";            this->_path = qmark_pos!=string::npos ? urlpath.substr(0,qmark_pos) : urlpath;            this->_port = urlport.length()==0 ? default_port : _atoi(urlport) ;        };    private:        static inline string _trim(const string& input)        {            string str = input;            size_t endpos = str.find_last_not_of(" \t\n\r");            if( string::npos != endpos )            {                str = str.substr( 0, endpos+1 );            }            size_t startpos = str.find_first_not_of(" \t\n\r");            if( string::npos != startpos )            {                str = str.substr( startpos );            }            return str;        };        static inline string _tolower(const string& input)        {            string str = input;            std::transform(str.begin(), str.end(), str.begin(), ::tolower);            return str;        };        static inline int _atoi(const string& input)         {            int r;            std::stringstream(input) >> r;            return r;        };}; int main(int argc, char **argv){    HTTPURL u("https://Mail.google.com:80/mail/?action=send#action=send");    cout << "protocol: " << u.protocol << endl;    cout << "domain: " << u.domain << endl;    cout << "port: " << u.port << endl;    cout << "path: " << u.path << endl;    cout << "query: " << u.query << endl;    return 0;}


0 0
原创粉丝点击