code1

来源：互联网发布：php 手机回收网站源码编辑：程序博客网时间：2024/05/02 04:32

##!/usr/bin/env python
#-*- coding: utf8 -*-

from urllib2 import Request,urlopen,URLError
import urllib2
#import http.cookiejar
from BeautifulSoup import BeautifulSoup
#from mysql.common import MySQLCurd
#import MySQLdb
import sys
#import getContent
stdout = sys.stdout
stdin = sys.stdin
stderr = sys.stderr

reload (sys)
sys.stdout = stdout
sys.stdin = stdin
sys.stderr = stderr

sys.setdefaultencoding('utf-8')

def do_list():
# url = 'http://bbs.tianya.cn/list-16-1.shtml'

file_object = open('test.html')
try:
all_the_text = file_object.read( )
finally:
file_object.close( )

print all_the_text

#oper = urlopen(req)
#data = oper.read()
#print(data.decode())
datasoup = BeautifulSoup(all_the_text)
print "datasoup====="
print datasoup
#help(urllib2)
list_soup = datasoup.find('tbody').findAll('tr')

print "list_soup========"
print list_soup

#所有列表的tr
counter = 1
if len(list_soup) > 0:
for item in list_soup:
if counter == 1:
counter=counter+1
continue

# item.find('td', {'class': 'nx4'}).findAll('li')
item_td = item.findAll('td')
if len(item_td) > 0:
index = 0
allListDict = dict()
isItem = 1
for td in item_td:
# print td
if index == 0: #type
print "td======="
print td
print td.content



break
# if td.get('class') == 'td-title faceblue':
# xiaoshuo_type = '1'
# if td.get('class') == 'td-title facered':
# xiaoshuo_type = '2'
# # print 'type:%s' % xiaoshuo_type
# a_start = td.find('a')
# title = a_start.next
# c00000 = td.find('font', {'color':'c00000'})
# cred = td.find('font', {'color':'red'})
# if c00000 is None:
# if cred is None:
# title = str(a_start.next).strip()
# else:
# title = str(a_start).replace('', '').replace('','').replace('', '').replace('','').replace('','').replace('','').replace('', '')
# else:
# title = str(a_start).replace('','').replace('', '').replace('','').replace('','').replace('','').replace('', '')
#
#
# a_href = a_start['href']
# print 'title==%s' % title.strip()
# print title
# print type(title)
# #allListDict[]
# # print type(a_start)
# # print a_start.next
# # print type(title)
# HREF = a_href
# ID = HREF[9:-8]
# print 'ID==%s' % ID
# print ID.find('-')
#
# if ID.find('-') >= 0:
# isItem = 0
#
# # print 'href:%s, %s' % (a_href,title.encode('utf-8').strip())
# print type(title)
# print 'href:%s' % (title.encode('utf-8').strip())
#if index == 1: #href
# a_start = td.find('a')
# author = a_start.next
# a_href = a_start['href']
# #print type(a_href)
# print 'href:%s, %s' % (a_href, author.strip())
# A_HREF = a_href
#if index == 2: #hits
# xiaoshuo_hits = td.contents
# print 'hits:%s' % td.next
# HITS = str(td.contents[0])
# print type(HITS)
#
#if index == 3: #reply
# print 'reply:%s' % td.next
# REPLY = str(td.next)
# print type(REPLY)
#
#if index == 4: #time
# print 'time:%s' % td.get('title')
# TIME = td.get('title')
#
index=index+1
# #sql = '''INSERT INTO list(name, `href`, `index`) VALUES (%s, %s, %s)'''
# #print 'sql===%s' % sql
# if isItem == 0:
# continue
#
# sql = 'select * from list where id=%d' % int(HREF[9:-8])
# data = m.fetchone(sql)
# print data
# print 'HREF=%s id=====%d' % (HREF, int(HREF[9:-8]))
# if data is None:
# sql = 'insert into list values(%d, "%s", "%s", "%s", "%s", "%s", 0, 0, 1,0, %d, %d, "", "", "")' % (int(HREF[9:-8]), title.strip(), xiaoshuo_type, HREF, author, A_HREF, int(HITS), int(REPLY))
# else:
# sql = 'update list set type=%s, retrivetime=%s, hits=%d, reply=%d, updatetime=%s where id=%d' % (xiaoshuo_type, '', int(HITS), int(REPLY), '', int(HREF[10:-8]))
# print 'sql===%s' % sql
# m.execute(sql)
#
#break
#div_nextpage = datasoup.find('div', {'class':'short-pages-2 clearfix'}) #查找下一页
#print div_nextpage
#if div_nextpage is None:
# return None, PAGE
#
#nexthref = div_nextpage.findAll('a')
#
#for a_nexthref in nexthref:
# descript = a_nexthref.next
# print descript
# if descript == '下一页':
# print a_nexthref
# nextpage = a_nexthref.get('href')
# print nextpage, PAGE+1
# return nextpage, PAGE+1
#
#return None, PAGE

do_list()

-------------------------------------------

create table list(
id integer NOT NULL AUTO_INCREMENT,
cert integer,
vendor char(1024),
lab char(1024),
module char(2048),
type char(100),
date char(20),
desc text
primary key(id)
);

0 0