美文网首页
爬取不同分辨率下的不同地图图片数据

爬取不同分辨率下的不同地图图片数据

作者: 左手一份执 | 来源:发表于2017-07-14 13:04 被阅读0次

爬取不同分辨率下的不同地图图片数据

from math import *
import urllib
import urllib2
import requests
import os
url_list=[]
# 生成url,如/8/0/0.png,8/0/1.png.../8/0/255.png.../8/255/255.png
def create_url(first,second):
    for y in range(int(pow(2,second))):
        for z in range(int(pow(2,second))):
            url_list.append(str(first)+'/'+str(y)+'/'+str(z)+'.png')
            print str(first)+'/'+str(y)+'/'+str(z)+'.png'
    return url_list

# 生成url对应的目录
def create_dirs(url_list,base_filepath):
    for x in url_list:
        x = x.split('/')
        file_path = base_filepath + str(x[0]) + '/' + str(x[1])+'/'
        if not os.path.exists(file_path):
            print file_path

            os.makedirs(file_path)


base_url = 'http://a.tile.openstreetmap.org/'
# 图片下载
def download_png(url_list,filepath):
    for x in url_list:
        url = base_url + x
        urllib.urlretrieve(url, filename='d:/test/'+x)
        # data = f.read()
        # with open(filepath + x, "wb+") as code:
        #     code.write(data)
url_list=create_url(8,8)
download_png(url_list,'d:/test/')
#create_dirs(url_list,'d:/test/')

用法

  • 先修改文件路径,分辨率等参数
  • 然后先注释掉download_png函数调用,先调用create_dirs函数创建目录,然后取消注释开始下载图片

采用多进程爬取,并处理网络带来的IOError

from math import *
import urllib
import urllib2
import requests
import os
from exceptions import IOError
import logging
import logging

logging.basicConfig(level=logging.WARNING,
                format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s',
                datefmt='%a, %d %b %Y %H:%M:%S',
                filename='myapp.log',
                filemode='w')

url_list=[]
filepath = 'd:/test/'
base_url = 'http://a.tile.openstreetmap.org/'

def create_url(start,rate):
    for y in range(start,int(pow(2,rate))):
        for z in range(int(pow(2,rate))):
            url_list.append(str(rate)+'/'+str(y)+'/'+str(z)+'.png')
            logging.warning(str(rate)+'/'+str(y)+'/'+str(z)+'.png')
    return url_list

def create_dirs(url_list,base_filepath):
    for x in url_list:
        x = x.split('/')
        file_path = base_filepath + str(x[0]) + '/' + str(x[1])+'/'
        if not os.path.exists(file_path):
            logging.warning(file_path)
            os.makedirs(file_path)


def download_png(url_list,filepath):
    for x in url_list:
        try:
            url = base_url + x
            print url
            logging.warning(url)
            urllib.urlretrieve(url, filename=filepath+x)
        except IOError as serr:
            logging.error(serr)
            time.sleep(180)
            urllib.urlretrieve(url, filename=filepath+x)

import multiprocessing
import time

def worker_1(start,rate):
    url_list = create_url(start, rate)
    create_dirs(url_list, filepath)
    download_png(url_list, filepath)



def worker_2(start,rate):
    url_list = create_url(start, rate)
    create_dirs(url_list, filepath)
    download_png(url_list, filepath)

def worker_3(start,rate):
    url_list = create_url(start, rate)
    create_dirs(url_list, filepath)
    download_png(url_list, filepath)

def worker_4(start,rate):
    url_list = create_url(start, rate)
    create_dirs(url_list, filepath)
    download_png(url_list, filepath)

def worker_5(start,rate):
    url_list = create_url(start, rate)
    create_dirs(url_list, filepath)
    download_png(url_list, filepath)

def worker_5(start,rate):
    url_list = create_url(start, rate)
    create_dirs(url_list, filepath)
    download_png(url_list, filepath)

if __name__ == "__main__":
    p1 = multiprocessing.Process(target = worker_1, args = (630,10))
    p2 = multiprocessing.Process(target = worker_2, args = (700,10))
    p3 = multiprocessing.Process(target = worker_3, args = (800,10))
    p4 = multiprocessing.Process(target = worker_4, args = (900, 10))
    p5 = multiprocessing.Process(target = worker_5, args = (1000, 10))
   

    p1.start()
    p2.start()
    p3.start()
    p4.start()
    p5.start()

相关文章

网友评论

      本文标题:爬取不同分辨率下的不同地图图片数据

      本文链接:https://www.haomeiwen.com/subject/lcwnhxtx.html