code1
来源:互联网 发布:php 手机回收网站源码 编辑:程序博客网 时间:2024/05/02 04:32
##!/usr/bin/env python
#-*- coding: utf8 -*-
from urllib2 import Request,urlopen,URLError
import urllib2
#import http.cookiejar
from BeautifulSoup import BeautifulSoup
#from mysql.common import MySQLCurd
#import MySQLdb
import sys
#import getContent
stdout = sys.stdout
stdin = sys.stdin
stderr = sys.stderr
reload (sys)
sys.stdout = stdout
sys.stdin = stdin
sys.stderr = stderr
sys.setdefaultencoding('utf-8')
def do_list():
# url = 'http://bbs.tianya.cn/list-16-1.shtml'
file_object = open('test.html')
try:
all_the_text = file_object.read( )
finally:
file_object.close( )
print all_the_text
#oper = urlopen(req)
#data = oper.read()
#print(data.decode())
datasoup = BeautifulSoup(all_the_text)
print "datasoup====="
print datasoup
#help(urllib2)
list_soup = datasoup.find('tbody').findAll('tr')
print "list_soup========"
print list_soup
#所有列表的tr
counter = 1
if len(list_soup) > 0:
for item in list_soup:
if counter == 1:
counter=counter+1
continue
# item.find('td', {'class': 'nx4'}).findAll('li')
item_td = item.findAll('td')
if len(item_td) > 0:
index = 0
allListDict = dict()
isItem = 1
for td in item_td:
# print td
if index == 0: #type
print "td======="
print td
print td.content
break
# if td.get('class') == 'td-title faceblue':
# xiaoshuo_type = '1'
# if td.get('class') == 'td-title facered':
# xiaoshuo_type = '2'
# # print 'type:%s' % xiaoshuo_type
# a_start = td.find('a')
# title = a_start.next
# c00000 = td.find('font', {'color':'c00000'})
# cred = td.find('font', {'color':'red'})
# if c00000 is None:
# if cred is None:
# title = str(a_start.next).strip()
# else:
# title = str(a_start).replace('<font color=red>', '').replace('</font>','').replace('<span class="art-ico art-ico-3">', '').replace('</span>','').replace('<b>','').replace('</b>','').replace('<span class="art-ico art-ico-5">', '')
# else:
# title = str(a_start).replace('</font>','').replace('<span class="art-ico art-ico-3">', '').replace('</span>','').replace('<b>','').replace('</b>','').replace('<span class="art-ico art-ico-5">', '')
#
#
# a_href = a_start['href']
# print 'title==%s' % title.strip()
# print title
# print type(title)
# #allListDict[]
# # print type(a_start)
# # print a_start.next
# # print type(title)
# HREF = a_href
# ID = HREF[9:-8]
# print 'ID==%s' % ID
# print ID.find('-')
#
# if ID.find('-') >= 0:
# isItem = 0
#
# # print 'href:%s, %s' % (a_href,title.encode('utf-8').strip())
# print type(title)
# print 'href:%s' % (title.encode('utf-8').strip())
#if index == 1: #href
# a_start = td.find('a')
# author = a_start.next
# a_href = a_start['href']
# #print type(a_href)
# print 'href:%s, %s' % (a_href, author.strip())
# A_HREF = a_href
#if index == 2: #hits
# xiaoshuo_hits = td.contents
# print 'hits:%s' % td.next
# HITS = str(td.contents[0])
# print type(HITS)
#
#if index == 3: #reply
# print 'reply:%s' % td.next
# REPLY = str(td.next)
# print type(REPLY)
#
#if index == 4: #time
# print 'time:%s' % td.get('title')
# TIME = td.get('title')
#
index=index+1
# #sql = '''INSERT INTO list(name, `href`, `index`) VALUES (%s, %s, %s)'''
# #print 'sql===%s' % sql
# if isItem == 0:
# continue
#
# sql = 'select * from list where id=%d' % int(HREF[9:-8])
# data = m.fetchone(sql)
# print data
# print 'HREF=%s id=====%d' % (HREF, int(HREF[9:-8]))
# if data is None:
# sql = 'insert into list values(%d, "%s", "%s", "%s", "%s", "%s", 0, 0, 1,0, %d, %d, "", "", "")' % (int(HREF[9:-8]), title.strip(), xiaoshuo_type, HREF, author, A_HREF, int(HITS), int(REPLY))
# else:
# sql = 'update list set type=%s, retrivetime=%s, hits=%d, reply=%d, updatetime=%s where id=%d' % (xiaoshuo_type, '', int(HITS), int(REPLY), '', int(HREF[10:-8]))
# print 'sql===%s' % sql
# m.execute(sql)
#
#break
#div_nextpage = datasoup.find('div', {'class':'short-pages-2 clearfix'}) #查找下一页
#print div_nextpage
#if div_nextpage is None:
# return None, PAGE
#
#nexthref = div_nextpage.findAll('a')
#
#for a_nexthref in nexthref:
# descript = a_nexthref.next
# print descript
# if descript == '下一页':
# print a_nexthref
# nextpage = a_nexthref.get('href')
# print nextpage, PAGE+1
# return nextpage, PAGE+1
#
#return None, PAGE
#-*- coding: utf8 -*-
from urllib2 import Request,urlopen,URLError
import urllib2
#import http.cookiejar
from BeautifulSoup import BeautifulSoup
#from mysql.common import MySQLCurd
#import MySQLdb
import sys
#import getContent
stdout = sys.stdout
stdin = sys.stdin
stderr = sys.stderr
reload (sys)
sys.stdout = stdout
sys.stdin = stdin
sys.stderr = stderr
sys.setdefaultencoding('utf-8')
def do_list():
# url = 'http://bbs.tianya.cn/list-16-1.shtml'
file_object = open('test.html')
try:
all_the_text = file_object.read( )
finally:
file_object.close( )
print all_the_text
#oper = urlopen(req)
#data = oper.read()
#print(data.decode())
datasoup = BeautifulSoup(all_the_text)
print "datasoup====="
print datasoup
#help(urllib2)
list_soup = datasoup.find('tbody').findAll('tr')
print "list_soup========"
print list_soup
#所有列表的tr
counter = 1
if len(list_soup) > 0:
for item in list_soup:
if counter == 1:
counter=counter+1
continue
# item.find('td', {'class': 'nx4'}).findAll('li')
item_td = item.findAll('td')
if len(item_td) > 0:
index = 0
allListDict = dict()
isItem = 1
for td in item_td:
# print td
if index == 0: #type
print "td======="
print td
print td.content
break
# if td.get('class') == 'td-title faceblue':
# xiaoshuo_type = '1'
# if td.get('class') == 'td-title facered':
# xiaoshuo_type = '2'
# # print 'type:%s' % xiaoshuo_type
# a_start = td.find('a')
# title = a_start.next
# c00000 = td.find('font', {'color':'c00000'})
# cred = td.find('font', {'color':'red'})
# if c00000 is None:
# if cred is None:
# title = str(a_start.next).strip()
# else:
# title = str(a_start).replace('<font color=red>', '').replace('</font>','').replace('<span class="art-ico art-ico-3">', '').replace('</span>','').replace('<b>','').replace('</b>','').replace('<span class="art-ico art-ico-5">', '')
# else:
# title = str(a_start).replace('</font>','').replace('<span class="art-ico art-ico-3">', '').replace('</span>','').replace('<b>','').replace('</b>','').replace('<span class="art-ico art-ico-5">', '')
#
#
# a_href = a_start['href']
# print 'title==%s' % title.strip()
# print title
# print type(title)
# #allListDict[]
# # print type(a_start)
# # print a_start.next
# # print type(title)
# HREF = a_href
# ID = HREF[9:-8]
# print 'ID==%s' % ID
# print ID.find('-')
#
# if ID.find('-') >= 0:
# isItem = 0
#
# # print 'href:%s, %s' % (a_href,title.encode('utf-8').strip())
# print type(title)
# print 'href:%s' % (title.encode('utf-8').strip())
#if index == 1: #href
# a_start = td.find('a')
# author = a_start.next
# a_href = a_start['href']
# #print type(a_href)
# print 'href:%s, %s' % (a_href, author.strip())
# A_HREF = a_href
#if index == 2: #hits
# xiaoshuo_hits = td.contents
# print 'hits:%s' % td.next
# HITS = str(td.contents[0])
# print type(HITS)
#
#if index == 3: #reply
# print 'reply:%s' % td.next
# REPLY = str(td.next)
# print type(REPLY)
#
#if index == 4: #time
# print 'time:%s' % td.get('title')
# TIME = td.get('title')
#
index=index+1
# #sql = '''INSERT INTO list(name, `href`, `index`) VALUES (%s, %s, %s)'''
# #print 'sql===%s' % sql
# if isItem == 0:
# continue
#
# sql = 'select * from list where id=%d' % int(HREF[9:-8])
# data = m.fetchone(sql)
# print data
# print 'HREF=%s id=====%d' % (HREF, int(HREF[9:-8]))
# if data is None:
# sql = 'insert into list values(%d, "%s", "%s", "%s", "%s", "%s", 0, 0, 1,0, %d, %d, "", "", "")' % (int(HREF[9:-8]), title.strip(), xiaoshuo_type, HREF, author, A_HREF, int(HITS), int(REPLY))
# else:
# sql = 'update list set type=%s, retrivetime=%s, hits=%d, reply=%d, updatetime=%s where id=%d' % (xiaoshuo_type, '', int(HITS), int(REPLY), '', int(HREF[10:-8]))
# print 'sql===%s' % sql
# m.execute(sql)
#
#break
#div_nextpage = datasoup.find('div', {'class':'short-pages-2 clearfix'}) #查找下一页
#print div_nextpage
#if div_nextpage is None:
# return None, PAGE
#
#nexthref = div_nextpage.findAll('a')
#
#for a_nexthref in nexthref:
# descript = a_nexthref.next
# print descript
# if descript == '下一页':
# print a_nexthref
# nextpage = a_nexthref.get('href')
# print nextpage, PAGE+1
# return nextpage, PAGE+1
#
#return None, PAGE
do_list()
-------------------------------------------
create table list(
id integer NOT NULL AUTO_INCREMENT,
cert integer,
vendor char(1024),
lab char(1024),
module char(2048),
type char(100),
date char(20),
desc text
primary key(id)
);
0 0
- code1
- java zip Code1
- 挖掘Google code1
- css jquery small code1
- CUDA-Code1-HelloWorld
- spring boot使用mongo:code1
- Cocos code1.2下载地址
- Code1 将字符串转换为byte数组
- 进程通信-共享内存-mmap()-code1
- PHP 中单引号和双引号区别: Code1
- Ex2:改写Canny算法(只用CImg库)【code1】
- vs code1.14 python配置anaconda(numpy包等)
- iPhoneOS.platform/Developer/usr/bin/llvm-gcc-4.2 failed with exit code1
- pod的SDK报错,Linker command failed with exit code1(use -v to see invocation)
- VS Code1.4 搭建Golang的开发调试环境(遇到很多问题)
- 【Ubuntu 16】启动Eclipse Indigo报错 error code1 jdk没有配置好
- 解决:mac visual code1.8.1 运行时提示:无法在 PATH 上找到运行时”node“。
- [深入理解Java虚拟机]第八章 字节码执行引擎-基于栈的字节码解释执行引擎
- WM5.0调用widows的Media Player及播放mp3的方法
- Python3 socket编程,并与多线程实现最简单的聊天工具之一
- Windows Media Player控件的所有属性和方法
- 详解IMapControl2、IMapControl3、 IMapControl4不同
- code1
- 2015互联网年,移动办公对企业发展到底有多大影响?
- WindowsMediaPlayer 接口详解
- OC基础第二章:面向对象
- linux下设置了SSH免密码登录但还是需要输入密码的解决办法
- Geometry Shader Concepts & Examples
- hadoop集群搭建——轻松版
- Java内存分析一
- nagios4.1.1 + pnp4nagios 0.6.25 的集成 生成图表