蓝奏云下载地址:https://wws.lanzous.com/iRmnqdpadri
有需求请在本贴留言,不再同步52pojie
2020-6-15-18:18
更新以下款手机壁纸选择
Samsung Note10
HuaWei Mate 20 Pro
HuaWei P30
iphone Xs Max

2020-6-15-17:10
更新了一些问题,并自我认为完美地加入多线程运作,此时下载速度起码提升了20倍(理论值),实践自我感觉也提升了20倍。
加入了程序自动寻找最大页数功能,不会下载一页就停止,目前不止爬取第一页壁纸了,而是爬取整站壁纸(即是用户选择好分类后直接下载该分类下最大页数的所有壁纸)
2.gif
1.gif
代码比较乱,早就想写了,之前没找到逻辑所以一直搁置在这,今天忽然一下找到了,于是匆忙写一个,之后会更新手机信号选择/壁纸分类选择/代码优化。
目前只能爬取第一页壁纸,待更新……

#coding:utf-8
import os,lxml,requests,sys,time,re
from lxml import etree
from requests.adapters import HTTPAdapter
from concurrent.futures import ThreadPoolExecutor,wait,ALL_COMPLETED
os.system('title Iphone11壁纸 @小伍的游乐场-5yang.cc')#设置窗口标题
#coding:utf-8
path = '壁纸'
if not os.path.exists(path):
    os.makedirs(path)

def toplist(url):#排行
    headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36'}
    s = requests.session()#加入会话保持系统
    s.mount('http://', HTTPAdapter(max_retries=10))#http和https都为最大重试10次
    s.mount('https://', HTTPAdapter(max_retries=10))
    print(time.strftime('%Y-%m-%d %H:%M:%S'))
    x = 0
    while x < 10:#while错误重试10次,与上面的max_retries相同,加起来等于每个链接重试21次
        try:
            r = s.get(url,headers=headers,timeout = (20,20))
            if r.status_code == 200:
                print('该地址已经return')
                #print(r.text)
                return r.text

        except requests.exceptions.RequestException as e:#必须加入except,否则程序爆出的错误让你无法定位,之前没加我的程序报我语法错误,且错误在pagenum()
            print(e)
            x += 1
            print('开始重试.')
            print(time.strftime('%Y-%m-%d %H:%M:%S'))

def fenxi(html):#获得当前页所有图片的div列表
    html = etree.HTML(html)
    #下边是在首页里找到最大翻页数量
    result = html.xpath('//div[@class="thumb-element"]/a/@href')
    return result
def page_num(html):
    zhengze = re.compile('<input type="text" class="form-control" placeholder="Page # / (.*?)">',re.S)
    first_pic_url = re.findall(zhengze,html)

    print('最大页数:',first_pic_url[0])
    return first_pic_url[0]
    #print(result)

def down_pic(picurl):#下载每条链接内的图片
    headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36'}
    for x in picurl:    
        try:
            r = requests.get(x,timeout = (20,20)).text
            html = etree.HTML(r)
            result = html.xpath('//*[@id="page_container"]/div[2]/img/@src')
            picname = html.xpath('//*[@id="page_container"]/div[2]/img/@alt')
            print(result[0])
            jpgget = requests.get(result[0],timeout = (20,20))
        except requests.exceptions.RequestException as e:
            print('下载超时!',e)
        pass
        try:
            with open(f'{path}/{picname}.jpg','wb') as f:
                f.write(jpgget.content)
                print(f'已完成{picname}的下载')
        except:
            print('下载超时!')
            pass

if __name__ == "__main__":
    urls = []
    while   True:
        w = int(input('请问你要下载哪一个手机壁纸:\n1:iphone11\n请输入:'))
        if w == 1:
            url = f'https://mobile.alphacoders.com/by-device/540/iPhone-11-Wallpapers?page='
        else:
            print('请正确输入数字')
        html = toplist(url)
        pagemax = page_num(html)
        for i in range(int(pagemax)+1):
            #print(i)
            urls.append(url + str(i))
        print(urls)
        print('开始下载新一页内容!')
        #for url in urls:
        #html = toplist(url)
        ex = ThreadPoolExecutor(max_workers=20)#变量ex就是20线程
        future = [ex.submit(down_pic,fenxi(toplist(url))) for url in urls]#ex.submit有2个参数,down_pic不要带括号和参数,逗号后边的参数就是一个可迭代对象用来给第一个参数使用的。
        wait(future,return_when=ALL_COMPLETED)#多线程处理模块的wait,等待括号内的所有内容结束
        print('=' * 50)
    print('所有图片下载完成!')


最后修改:2022 年 12 月 05 日
如果觉得我的文章对你有用,请随意赞赏