Python获取高德地图POI

来源:互联网 发布:2016网络新兴词汇 编辑:程序博客网 时间:2024/04/29 12:57
# -*- encoding: utf-8 -*-# coding:utf-8import xlrdfrom xlwt import Workbookfrom tempfile import TemporaryFileimport urllib.requestimport xml.dom.minidom as minidomimport xlwt#结果目录inforst = '/home/yuhz/PycharmProjects/untitled2/POI/'data = xlrd.open_workbook('/home/yuhz/PycharmProjects/untitled2/POI/city1.xls')sheet=data.sheets()[0]file_name = 'result.txt'  # write result to this file#keyword = '汽车修理' urllib.parse.quote(keyword)url_amap = 'http://restapi.amap.com/v3/place/text?&keywords=&types=010800&city=370602&citylimit=true&&output=xml&offset=20&page=1&key=&extensions=base'#facility_type = r'types=170300'  # factory facilities#region = r'city=120113'  # beichen of tianjineach_page_rec = 20  # results that displays in one pagewhich_pach = r'page=1'  # display which pagexml_file = 'tmp.xml'  # xml filen name#写入Excel(定义Excel表头)book = Workbook()#sheet1 = book.add_sheet('Sheet 0')#自动换行style = xlwt.easyxf('align: wrap on')# get html by url and save the data to xml filedef getHtml(url):    page = urllib.request.urlopen(url)    html = page.read()    try:        # open xml file and save data to it        with open(xml_file, 'wb') as xml_file_handle:            xml_file_handle.write(html)    except IOError as err:        print("IO error: " + str(err))        return -1    return 0# phrase data from xmldef parseXML(index):    total_rec = 1  # record number    # open xml file and get data record    try:        with open(file_name, 'a') as file_handle:            dom = minidom.parse(xml_file)            root = dom.getElementsByTagName("response")  # The function getElementsByTagName returns NodeList.            for node in root:                total_rec = node.getElementsByTagName("count")[0].childNodes[0].nodeValue                pois = node.getElementsByTagName("pois")                for poi in pois[0].getElementsByTagName('poi'):                    name = poi.getElementsByTagName("name")[0].childNodes[0].nodeValue                    try:                        address = poi.getElementsByTagName("address")[0].childNodes[0].nodeValue                    except IndexError:                        address = ""                    try:                        tel = poi.getElementsByTagName("tel")[0].childNodes[0].nodeValue                    except IndexError:                        tel = ""                    try:                        pname = poi.getElementsByTagName("pname")[0].childNodes[0].nodeValue                    except IndexError:                        pname = ""                    try:                        cityname = poi.getElementsByTagName("cityname")[0].childNodes[0].nodeValue                    except IndexError:                        cityname = ""                    try:                        adname = poi.getElementsByTagName("adname")[0].childNodes[0].nodeValue                    except IndexError:                        adname = ""                    location = poi.getElementsByTagName("location")[0].childNodes[0].nodeValue                    #写入Excel                    index = index + 1                    row1 = sheet1.row(index)                    row1.write(0, name, style)                    row1.write(1, address, style)                    row1.write(2, tel, style)                    row1.write(3, pname, style)                    row1.write(4, cityname, style)                    row1.write(5, adname, style)                    row1.write(6, location, style)    except IOError as err:        print        "IO error: " + str(err)    return total_recif __name__ == '__main__':    nrows = sheet.nrows    ncols = sheet.ncols    sheet2 = book.add_sheet('All Data')    row2 = sheet2.row(0)    row2.write(0, 'Owner')    row2.write(1, 'city')    row2.write(2, 'count')    sheet2.col(0).width = 10000    sheet2.col(1).width = 10000    sheet2.col(2).width = 10000    sheet2.col(3).width = 5000    total_record = 0    keywords = ''    owner =''    for i in range(nrows):        keywords = sheet.row(i)[1].value        owner = sheet.row(i)[0].value        sheet1 = book.add_sheet(keywords)        row1 = sheet1.row(0)        row1.write(0, 'CORPNAME')        row1.write(1, 'ADDRESS')        row1.write(2, 'TEL')        row1.write(3, 'pname')        row1.write(4, 'cityname')        row1.write(5, 'adname')        row1.write(6, 'location')        sheet1.col(0).width = 10000        sheet1.col(1).width = 10000        sheet1.col(2).width = 10000        sheet1.col(3).width = 5000        sheet1.col(4).width = 5000        sheet1.col(5).width = 5000        sheet1.col(6).width = 5000        index = 0        # for j in range(ncols):        url_amap = 'http://restapi.amap.com/v3/place/text?&keywords=&types=010800&city='+ urllib.parse.quote(            keywords) +'&citylimit=true&&output=xml&offset=20&page=1&key=&extensions=base'        if getHtml(url_amap) == 0:            print('parsing page 1 ... ...')            # parse the xml file and get the total record number            total_record_str = parseXML(index)            total_record = int(total_record_str)            if (total_record % each_page_rec) != 0:                page_number = total_record / each_page_rec + 2            else:                page_number = total_record / each_page_rec + 1             # retrive the other records            for each_page in range(2, int(page_number)):                index = index + 20                print('parsing page ' + str(each_page) + ' ... ...')                url_amap = url_amap.replace('page=' + str(each_page - 1), 'page=' + str(each_page))                getHtml(url_amap)                total_record_str = parseXML(index)                total_record = int(total_record_str)                if total_record == 0:                    break        else:            print            'error: fail to get xml from amap'        # 保存Excel        row2 = sheet2.row(i+1)        row2.write(0, owner)        row2.write(1, keywords)        row2.write(2, total_record)        sheet2.col(0).width = 10000        sheet2.col(1).width = 10000        sheet2.col(2).width = 10000    book.save(inforst + 'result.xls')    book.save(TemporaryFile())

version1.0 支持跨sheet页获取数据,并且按照原sheet页顺序写入。

# -*- encoding: utf-8 -*-# coding:utf-8import xlrdfrom xlwt import Workbookfrom tempfile import TemporaryFileimport urllib.requestimport xml.dom.minidom as minidomimport xlwt#结果目录inforst = 'C:/Users/玲玲/PycharmProjects/untitled/POI'data = xlrd.open_workbook('C:/Users/玲玲/PycharmProjects/untitled/POI/city.xls')#sheet=data.sheets()[0]file_name = 'result.txt'  # write result to this file#keyword = '汽车修理' urllib.parse.quote(keyword)url_amap = 'http://restapi.amap.com/v3/place/text?&keywords=&types=010800&city=370602&citylimit=true&&output=xml&offset=20&page=1&key=¥¥&extensions=base'#facility_type = r'types=170300'  # factory facilities#region = r'city=120113'  # beichen of tianjineach_page_rec = 20  # results that displays in one pagewhich_pach = r'page=1'  # display which pagexml_file = 'tmp.xml'  # xml filen name#写入Excel(定义Excel表头)book = Workbook()all_index = 0#sheet1 = book.add_sheet('Sheet 0')#自动换行style = xlwt.easyxf('align: wrap on')# get html by url and save the data to xml filedef getHtml(url):    page = urllib.request.urlopen(url)    html = page.read()    try:        # open xml file and save data to it        with open(xml_file, 'wb') as xml_file_handle:            xml_file_handle.write(html)    except IOError as err:        print("IO error: " + str(err))        return -1    return 0# phrase data from xmldef parseXML(owner):    total_rec = 1  # record number    # open xml file and get data record    try:        with open(file_name, 'a') as file_handle:            dom = minidom.parse(xml_file)            root = dom.getElementsByTagName("response")  # The function getElementsByTagName returns NodeList.            for node in root:                total_rec = node.getElementsByTagName("count")[0].childNodes[0].nodeValue                pois = node.getElementsByTagName("pois")                for poi in pois[0].getElementsByTagName('poi'):                    name = poi.getElementsByTagName("name")[0].childNodes[0].nodeValue                    try:                        address = poi.getElementsByTagName("address")[0].childNodes[0].nodeValue                    except IndexError:                        address = ""                    try:                        tel = poi.getElementsByTagName("tel")[0].childNodes[0].nodeValue                    except IndexError:                        tel = ""                    try:                        pname = poi.getElementsByTagName("pname")[0].childNodes[0].nodeValue                    except IndexError:                        pname = ""                    try:                        cityname = poi.getElementsByTagName("cityname")[0].childNodes[0].nodeValue                    except IndexError:                        cityname = ""                    try:                        adname = poi.getElementsByTagName("adname")[0].childNodes[0].nodeValue                    except IndexError:                        adname = ""                    location = poi.getElementsByTagName("location")[0].childNodes[0].nodeValue                    #写入Excel                    #index = index + 1                    global all_index                    all_index = all_index + 1                    row1 = sheet1.row(all_index)                    row1.write(0, name, style)                    row1.write(1, address, style)                    row1.write(2, tel, style)                    row1.write(3, pname, style)                    row1.write(4, cityname, style)                    row1.write(5, adname, style)                    row1.write(6, location, style)                    row1.write(7,owner, style)    except IOError as err:        print        "IO error: " + str(err)    return total_recif __name__ == '__main__':    # 打开工作表    worksheets = data.sheet_names()    sheet2 = book.add_sheet('All Data')    row2 = sheet2.row(0)    row2.write(0, 'Owner')    row2.write(1, 'city')    row2.write(2, 'count')    row2.write(3, 'area')    sheet2.col(0).width = 10000    sheet2.col(1).width = 10000    sheet2.col(2).width = 10000    sheet2.col(3).width = 5000    summary_index = 1    total_record = 0    # 遍历所有sheet对象    for worksheet_name in worksheets:        sheet = data.sheet_by_name(worksheet_name)        nrows = sheet.nrows        ncols = sheet.ncols        keywords = ''        owner =''        pre_owner = ''        sheet1 = book.add_sheet(worksheet_name)        row1 = sheet1.row(0)        row1.write(0, 'CORPNAME')        row1.write(1, 'ADDRESS')        row1.write(2, 'TEL')        row1.write(3, 'pname')        row1.write(4, 'cityname')        row1.write(5, 'adname')        row1.write(6, 'location')        row1.write(7, 'owner')        sheet1.col(0).width = 10000        sheet1.col(1).width = 10000        sheet1.col(2).width = 10000        sheet1.col(3).width = 5000        sheet1.col(4).width = 5000        sheet1.col(5).width = 5000        sheet1.col(6).width = 5000        sheet1.col(7).width = 2000        all_index = 0        index_from = 0        for i in range(nrows):            #index = index + 1            keywords = sheet.row(i)[1].value            if owner.strip() == '':                pre_owner = pre_owner            else:                pre_owner = owner            #print(pre_owner)            #print(owner)            temp_owner = owner            if owner.strip() == '':                temp_owner = pre_owner            owner = sheet.row(i)[0].value            #sheet1 = book.add_sheet(keywords)            # for j in range(ncols):            url_amap = 'http://restapi.amap.com/v3/place/text?&keywords=&types=010800&city='+ urllib.parse.quote(                keywords) +'&citylimit=true&&output=xml&offset=20&page=1&key=*****&extensions=base'            if getHtml(url_amap) == 0:                print('parsing page 1 ... ...')                # parse the xml file and get the total record number                #print(index)                #total_record_str = parseXML(temp_owner) #如果该列有合并单元格.使用temp_owner                total_record_str = parseXML(owner) #如果该列没有合并单元格.使用owner                total_record = int(total_record_str)                #print(total_record_str)                if (total_record % each_page_rec) != 0:                    page_number = total_record / each_page_rec + 2                else:                    page_number = total_record / each_page_rec + 1                #index = index + 20                 # retrive the other records                for each_page in range(2, int(page_number)):                    #index = index + 20                    print('parsing page ' + str(each_page) + ' ... ...')                    url_amap = url_amap.replace('page=' + str(each_page - 1), 'page=' + str(each_page))                    getHtml(url_amap)                    #total_record_str = parseXML(temp_owner) #如果该列有合并单元格.使用temp_owner                    total_record_str = parseXML(owner) #如果该列没有合并单元格.使用owner                    total_record = int(total_record_str)                    #print(index)                    #print(total_record_str)                    if total_record == 0:                        break            else:                print                'error: fail to get xml from amap'            # 保存Excel            row2 = sheet2.row(summary_index)            #row2.write(0, temp_owner) #如果该列有合并单元格.使用temp_owner            row2.write(0, owner) #如果该列没有合并单元格.使用owner            row2.write(1, keywords)            row2.write(2, total_record)            row2.write(3, worksheet_name)            sheet2.col(0).width = 10000            sheet2.col(1).width = 10000            sheet2.col(2).width = 10000            sheet2.col(3).width = 5000            summary_index = summary_index + 1    book.save(inforst + 'result.xls')    book.save(TemporaryFile())
0 0
原创粉丝点击