筛选出可用的代理+刷百度博客访问量+ip正则表达式

来源：互联网发布：java instance单例编辑：程序博客网时间：2024/05/23 02:24

#! /usr/bin/env python#coding=utf-8import reimport myThreadwhile myThread.proxy.empty()==False:    myThread.proxy.get()fp=open("c:\\1.txt","r")file_content=fp.read()fp.close()pattern=r"(?:(?:\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.){3}(?:\d|[1-9]\d|2[0-4]\d|25[0-5]):\d{1,4}"#用于匹配(ip:prot)r=re.compile(pattern)Sock=r.findall(file_content)for p in Sock:#将所有的代理加入队列    myThread.proxy.put(p)myThread.availableThread=50#开启50个线程for i in range(myThread.availableThread):    t1=myThread.mThread(str(i))    t1.start()myThread.proxy.join()#等待直到队列为空，此时proxy被处理完Sock=[]while myThread.newProxy.empty()==False:    Sock.append(myThread.newProxy.get())fp=open("c:\\1.txt","w")for p in Sock:    s=str(p).split(r'//')[1]#获取(ip:prot)部分    s=s.split('\'')[0]    fp.write(str(s)+"\n")#将可用的代理写入文件fp.close()

#! /usr/bin/env python#coding=utf-8import urllib2,time,reimport urllib2,cookielibimport urllib, time, sysfrom threading import Threadimport Queueimport thread,socketproxy=Queue.Queue()newProxy=Queue.Queue()count=0availableThread=0class mThread(Thread):    def __init__(self,threadname):        Thread.__init__(self)#初始化父构造函数        self.cookie=cookielib.CookieJar()#cookie        self.opener=urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cookie))        urllib2.install_opener(self.opener)        self.name=threadname        self.lock=thread.allocate_lock()#临界区域锁        socket.setdefaulttimeout(5)#设置超时时间为5s    def run(self):        global proxy,newProxy        while True:            if proxy.qsize()==0:#当没有可用的代理的时候推出                print "not proxy availabe!"                self.lock.acquire()#进入临街区域                global availableThread                availableThread-=1                print "可用线程数",availableThread                self.lock.release()#离开临界区域                sys.exit(1)            self.lock.acquire()            global count            count=count+1            print "已使用代理",count," 可用:",proxy.qsize()            self.lock.release()            sock=proxy.get()#从待检查的代理中获取代码            self.MYPROXY={'http':'http://'+sock}#使用当前的代理            try:                h=urllib.urlopen(r'http://www.google.com.hk',proxies=self.MYPROXY)#建立url连接                ll=h.read()#                ll.close()                print self.MYPROXY,"__",self.name                newProxy.put(self.MYPROXY)#加入到可用的代理中            except IOError:                print "Error",sock            proxy.task_done()#当前队列元素用好了if __name__=='__main__':    mThread.start()

改装后可用于刷百度空间的流量

#! /usr/bin/env python#coding=utf-8import urllib2,time,reimport urllib2,cookielibimport urllib, time, sysfrom threading import Threadimport thread,socketcount=0class mThread(Thread):    def __init__(self,threadname):        Thread.__init__(self)#初始化父构造函数        self.cookie=cookielib.CookieJar()#cookie        self.opener=urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cookie))        urllib2.install_opener(self.opener)        self.name=threadname        self.lock=thread.allocate_lock()#临界区域锁        socket.setdefaulttimeout(5)#设置超时时间为5s        fp=open("c:\\1.txt","r")        file_content=fp.read()        fp.close()        pattern=r"(?:(?:\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.){3}(?:\d|[1-9]\d|2[0-4]\d|25[0-5]):\d{1,4}"#用于匹配(ip:prot)        r=re.compile(pattern)        self.proxy=r.findall(file_content)    def run(self):        for p in self.proxy:            self.MYPROXY={'http':'http://'+p}#使用当前的代理            try:                h=urllib.urlopen(r'http://hi.baidu.com/ckl_soft/item/9971ab21a031f157c28d5977',proxies=self.MYPROXY)#建立url连接                ll=h.read()                #                ll.close()                global count                print self.MYPROXY,"__",self.name,"__",count                self.lock.acquire()                count=count+1                self.lock.release()            except IOError:                print "没响应",pif __name__=='__main__':    mThread.start()

#! /usr/bin/env python#coding=utf-8import reimport myThreadfor i in range(50):    myThread.mThread(str(i)).start()