[No.003-4]爬虫网易赔率数据并导入到mysql数据库

来源:互联网 发布:尤里盖勒 知乎 编辑:程序博客网 时间:2024/06/07 06:43

收集比赛结果

#encoding:utf-8import sysimport urllib2import refrom bs4 import BeautifulSoupimport MySQLdb as mdbreload(sys)sys.setdefaultencoding('utf-8')soup = BeautifulSoup(open("20140830"))#1.场次 #先获得temp(包含None和其他信息的队列)scr = []temp =[]for trs in soup.findAll("tr"):    for tds in trs.findAll("td",{"width":"50"}):        temp.append(tds.string)lt = len(temp)/4for i in range(lt):    scr.append(temp[4*i+3])#2.赛事类型league = []for item in soup.findAll("tr"):    for item1 in item.findAll("td",{"width":"70"}):        league.append(item1.string.encode('utf-8'))#3.比赛日期temp = []gmdate = []for item in soup.findAll("tr"):    for item1 in item.findAll("td",{"width":"61"}):        temp.append(item1)#删除无效的前3个数据temp =temp[3:]for item in temp:    gmdate.append(re.search("\w{4}-\w{2}-\w{2}",str(item)).group())#4.主队,客队team=[]for item in soup.findAll("tr"):    for teams in item.findAll("a",{"class":"dui"}):        team.append(teams.string.strip().encode('utf-8'))#5.胜平负赔率spfpl =[]temp = []for trs in soup.findAll("tr"):    for tds in trs.findAll("span"):        temp.append(tds.string)#删除首尾两个无效数据temp =temp[1:-1]for i in range(len(temp)):    for item in temp[7*i+4:7*i+7]:        spfpl.append(item)#6.比分结果以及比分结果赔率bfjg = []temp = []for trs in soup.findAll("tr"):    for item in trs.findAll("div",{"align":"center"}):        for item1 in item.findAll("strong"):            bfjg.append(item1.string.encode('utf-8'))#7.形成场次日期唯一idids=[]for i in range(len(scr)):    ids.append(gmdate[i]+'-'+scr[i])#装配结果集res =[]for i in range(len(scr)):    res.append(ids[i])    res.append(scr[i])    res.append(league[i])    res.append(gmdate[i])    res.append(team[2*i])    res.append(team[2*i+1])    res.append(spfpl[3*i])    res.append(spfpl[3*i+1])    res.append(spfpl[3*i+2])    res.append(bfjg[2*i])    res.append(bfjg[2*i+1])#更新数据库内容conn=mdb.connect(host='localhost',user='root',passwd='oracle',db='betdb',port=3306)cur = conn.cursor()SQL="insert into results(id,lea,gmd,hos,gue,win,dog,los,res,odd) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"l =len(res)/10for i in range(l):    for item in res[10*i:10*i+1]:        cur.execute(SQL,res[10*i:10*i+10])        i+=1#更新胜平负sql_update3="update results set spf=3 where res='1:0' or res='2:0' or res='2:1' or res='3:0' or res='3:1' or res='3:2' or res='4:0' or res='4:1' or res='4:2' or res='5:0' or res='5:1' or res='5:2' or res='胜其他';"sql_update1="update results set spf=1 where res='0:0' or res='1:1' or res='2:2' or res='3:3' or res='平其他';"sql_update0="update results set spf=0 where res='0:1' or res='0:2' or res='1:2' or res='0:3' or res='1:3' or res='2:3' or res='0:4' or res='1:4' or res='2:4' or res='0:5' or res='1:5' or res='2:5' or res='负其他';"cur.execute(sql_update3)cur.execute(sql_update1)cur.execute(sql_update0)#更新总进球sql_updatezjq0="update results set zjq=0 where res='0:0'"sql_updatezjq1="update results set zjq=1 where res='1:0' or res='0:1'"sql_updatezjq2="update results set zjq=2 where res='2:0' or res='0:1' or res='1:1' or res='0:2'"sql_updatezjq3="update results set zjq=3 where res='3:0' or res='2:1' or res='1:2' or res='0:3'"sql_updatezjq4="update results set zjq=4 where res='4:0' or res='3:1' or res='2:2' or res='1:3' or res='0:4'"sql_updatezjq5="update results set zjq=5 where res='5:0' or res='4:1' or res='3:2' or res='2:3' or res='0:5' or res='1:4'"sql_updatezjq6="update results set zjq=6 where res='5:1' or res='3:3' or res='4:2' or res='2:4' or res='1:5'"sql_updatezjq7="update results set zjq=7 where res='胜其他' or res='负其他'"cur.execute(sql_updatezjq0)cur.execute(sql_updatezjq1)cur.execute(sql_updatezjq2)cur.execute(sql_updatezjq3)cur.execute(sql_updatezjq4)cur.execute(sql_updatezjq5)cur.execute(sql_updatezjq6)cur.execute(sql_updatezjq7)
0 0