python读网页解析内容写入文件

来源:互联网 发布:网络打印机安装步骤 编辑:程序博客网 时间:2024/05/21 15:45

快速入门 http://developer.51cto.com/art/201207/347006.htm

注意编译指令有一处错误

python -O -m py_compile hello.py

工程发布

http://blog.csdn.net/u011465933/article/details/9567375


下面是相关代码

import sysimport urllibimport randomimport timedef GetNowTime():    return time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(time.time()))def GetDayTime():    return time.strftime("%Y-%m-%d",time.localtime(time.time()))def WriteFile(ip, dat):strCurTime = GetDayTime()output = open(strCurTime + "_" + ip, 'a+')output.writelines(dat)output.close()def GetHtml(url):response=urllib.urlopen(url)html = response.read()return htmldevIp = raw_input("ip: ") print devIpbaseUrl = 'http://' + devIp + '/goform/formIPQAM?type=3&cmd=1&language=1&gbeNo=0&ran='while True:url = baseUrl + str(random.uniform(0, 1)) + '&rowIndex=0'html = GetHtml(url);arrTsk =  html.split('<*1*>')nIndx = len(arrTsk) - 1totalTskNum = 0if nIndx > 0:strTail = arrTsk[-1]strTail = strTail.split('<*2*>')strTail = strTail[-1]strTail = strTail.split('<')strTail = strTail[0]totalTskNum = int(strTail)WriteFile(devIp, "*****" + GetNowTime() + "****" + str(totalTskNum) + "********************\n")if nIndx > 0:WriteFile(devIp, html)while totalTskNum > nIndx:url = baseUrl + str(random.uniform(0, 1)) + '&rowIndex=' + str(nIndx)html = GetHtml(url)WriteFile(devIp, html)nIndx += 16print totalTskNumWriteFile(devIp, "*****end****************************************\n")time.sleep(3600)


0 0
原创粉丝点击