Python connects to MongDB

来源:互联网 发布:linux压缩和解压命令 编辑:程序博客网 时间:2024/05/01 07:00

Today, I finished the connection work from Python to MongDB. It uses the module pymongo for purpose.


I'll give the codes directly, because it's very cold here, and I gotto go now...


import reimport jsonimport codecsimport urllib.requestfrom bs4 import BeautifulSoupfrom pymongo import MongoClientdef getHtml(url):    page = urllib.request.urlopen(url)    html = page.read()    return htmldef New(link, dep, son, far):    global col    url = {                'link': link,                'dep': dep,                'son': son,                'far': far            }    urllist.append(url)    """print(json.dumps(url, sort_keys=True, indent=4, separators=(',', ': ')))"""    col.insert(url)def PrintSoup(soup):    file = codecs.open('soup.txt', 'w+', 'utf-8')    file.write(soup.prettify())    file.close()def Develop(point):    url = urllist[point]    link = url.get('link')    html = getHtml(link)    soup = BeautifulSoup(html, 'html.parser')    for ai in soup.find_all('a'):        href = str(ai.get('href'))        if re.match(r'^https?://.+$', href):            New(href, url.get('dep') + 1, 0, url.get('link'))            url['son'] += 1client = MongoClient('localhost', 27017)db = client.Linkscol = db.Linkspoint = 0urllist = []New("https://image.baidu.com", 1, 0, "")while len(urllist) < 3000:    if point >= len(urllist):        break    Develop(point)    point = point + 1