AAuto - 抓取网页新闻

来源:互联网 发布:c语言的函数 编辑:程序博客网 时间:2024/04/27 21:51


import win.ui;import web.form;io.open();winform2 = win.form( bottom=1023;parent=...;text="抓新闻";right=1279;scroll=1 )wb2 = web.form( winform2 );wb2.go("http://roll.finance.sina.com.cn/s/channel.php?ch=03#col=43&spec=&type=&ch=03&k=&offset_page=0&offset_num=0&num=60&asc=&page=1http://roll.finance.qq.com/#");winform2.show(false);wb2.wait("");function main(){news={};while(true){io.print("新闻"+tostring(time.now()));新浪财经();for(i=1;#news)io.print(tostring(news[i].时间)+"  "+news[i].标题+" "+news[i].来源);news={};sleep(10000);}}function 新浪财经(){io.print("正在登陆新浪");wb2.refresh();//sleep(1000);wb2.wait("",10000);io.print("登陆完成");var ele = wb2.getEle("d_list") ;if(!ele) return;var innerHTML=tostring(ele.innerHTML);if(!innerHTML) return;//io.print(innerHTML);var tmpstr=string.split(innerHTML,"</LI>");var count=0;for(i=1;#tmpstr){tmpstr[i]=tostring(tmpstr[i]);//io.print(tmpstr[i]);版面=string.match(tmpstr[i],"\[\<A href.+?\>(.+?)\</A\>]");if(版面 and 版面!="生活" and 版面!="军事"){// and (string.find(版面,"经") or string.find(版面,"财")or string.find(版面,"股"))){时间=string.match(tmpstr[i],"\<SPAN class=c_time.+?\>(.+?)\</SPAN>");时间=time(时间,"%m-%d %H:%M");时间.format="%H:%M"p,q=string.find(tmpstr[i],版面);tmpstr[i]=string.sub(tostring(tmpstr[i]),q,#tmpstr[i]);标题=string.match(tmpstr[i],"\<A href.+?\>(.+?)\</A\>");table.push(news,{时间=时间;标题=string.replace(标题,"\((.+?\))","");来源="新浪"});count++;if(count>=10) break;}}io.print("读取了"+count+"条新闻");}main();execute("pause");io.close();