socket 查询百度

来源:互联网 发布:淘宝哪里进货 编辑:程序博客网 时间:2024/05/19 06:37
# -*- coding: utf-8 -*-
# querybaidu.py
'''
socket 给百度发送http请求
连接成功后,发送http的get请求,所搜索功能
'''

import socket
import sys
import time
if __name__=='__main__':
    #创建套接字
    try :
        sock = socket.socket(socket.AF_INET,socket.SOCK_STREAM)
    except socket.eorror,e:
        print 'socket false:%s'%e
    print 'socket ...'
  
    #连接百度ip
    try :
        sock.connect(('220.181.111.148',80))
    except socket.error,e:
        print 'connect false %s'%e
        sock.close()
    print 'connect ...'
  
    #发送百度首页面请求并且保持连接
    try :
        print 'send start...'
        str='GET / HTTP/1.1\r\nHost:www.baidu.com\r\nConnection:keep-alive\r\n\r\n'
        sock.send(str)
    except socket.eorror,e:
        print 'send false'
        sock.close()
   
    data=''
    data =sock.recv(1024)
    word ='baidu'
    while (1):      
        '''如何判断数据接收完毕,在发送http 最前端,
        包含发送数据文件大小属性Content-Length,
        用字符匹配方式取得文件大小,
        同过大小判断是否接收完毕。
        '''
        print data
        beg = data.find('Content-Length:',0,len(data))
        end = data.find('Content-Type:',0,len(data))
        print beg
        print end
        if(beg== end):
            print 'connecting closed'
            break
        num = long(data[beg+16:end-2])
        print num
        nums = 0
       
        # 写入内容忽略 文件中部分的信息
        fp=open(word+'.html','w')
        while (1):
            data=sock.recv(1024)
            #print data
            fp.write(data)
            nums +=len(data)
            if(nums>= num):
                if fp:fp.close()
                break
        word = raw_input('please input your word----->')
        str='''GET /s?wd='''+ word+ ''' HTTP/1.1
Host:www.baidu.com
Connection: Keep-Alive

'''
        print str
        sock.send(str)
        data = ''
        data = sock.recv(1024)  
    sock.close()
    #print data