蓝奏云下载地址:https://wws.lanzous.com/iRmnqdpadri
有需求请在本贴留言,不再同步52pojie
2020-6-15-18:18
更新以下款手机壁纸选择
Samsung Note10
HuaWei Mate 20 Pro
HuaWei P30
iphone Xs Max
2020-6-15-17:10
更新了一些问题,并自我认为完美地加入多线程运作,此时下载速度起码提升了20倍(理论值),实践自我感觉也提升了20倍。
加入了程序自动寻找最大页数功能,不会下载一页就停止,目前不止爬取第一页壁纸了,而是爬取整站壁纸(即是用户选择好分类后直接下载该分类下最大页数的所有壁纸)
代码比较乱,早就想写了,之前没找到逻辑所以一直搁置在这,今天忽然一下找到了,于是匆忙写一个,之后会更新手机信号选择/壁纸分类选择/代码优化。
目前只能爬取第一页壁纸,待更新……
#coding:utf-8
import os,lxml,requests,sys,time,re
from lxml import etree
from requests.adapters import HTTPAdapter
from concurrent.futures import ThreadPoolExecutor,wait,ALL_COMPLETED
os.system('title Iphone11壁纸 @小伍的游乐场-5yang.cc')#设置窗口标题
#coding:utf-8
path = '壁纸'
if not os.path.exists(path):
os.makedirs(path)
def toplist(url):#排行
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36'}
s = requests.session()#加入会话保持系统
s.mount('http://', HTTPAdapter(max_retries=10))#http和https都为最大重试10次
s.mount('https://', HTTPAdapter(max_retries=10))
print(time.strftime('%Y-%m-%d %H:%M:%S'))
x = 0
while x < 10:#while错误重试10次,与上面的max_retries相同,加起来等于每个链接重试21次
try:
r = s.get(url,headers=headers,timeout = (20,20))
if r.status_code == 200:
print('该地址已经return')
#print(r.text)
return r.text
except requests.exceptions.RequestException as e:#必须加入except,否则程序爆出的错误让你无法定位,之前没加我的程序报我语法错误,且错误在pagenum()
print(e)
x += 1
print('开始重试.')
print(time.strftime('%Y-%m-%d %H:%M:%S'))
def fenxi(html):#获得当前页所有图片的div列表
html = etree.HTML(html)
#下边是在首页里找到最大翻页数量
result = html.xpath('//div[@class="thumb-element"]/a/@href')
return result
def page_num(html):
zhengze = re.compile('<input type="text" class="form-control" placeholder="Page # / (.*?)">',re.S)
first_pic_url = re.findall(zhengze,html)
print('最大页数:',first_pic_url[0])
return first_pic_url[0]
#print(result)
def down_pic(picurl):#下载每条链接内的图片
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36'}
for x in picurl:
try:
r = requests.get(x,timeout = (20,20)).text
html = etree.HTML(r)
result = html.xpath('//*[@id="page_container"]/div[2]/img/@src')
picname = html.xpath('//*[@id="page_container"]/div[2]/img/@alt')
print(result[0])
jpgget = requests.get(result[0],timeout = (20,20))
except requests.exceptions.RequestException as e:
print('下载超时!',e)
pass
try:
with open(f'{path}/{picname}.jpg','wb') as f:
f.write(jpgget.content)
print(f'已完成{picname}的下载')
except:
print('下载超时!')
pass
if __name__ == "__main__":
urls = []
while True:
w = int(input('请问你要下载哪一个手机壁纸:\n1:iphone11\n请输入:'))
if w == 1:
url = f'https://mobile.alphacoders.com/by-device/540/iPhone-11-Wallpapers?page='
else:
print('请正确输入数字')
html = toplist(url)
pagemax = page_num(html)
for i in range(int(pagemax)+1):
#print(i)
urls.append(url + str(i))
print(urls)
print('开始下载新一页内容!')
#for url in urls:
#html = toplist(url)
ex = ThreadPoolExecutor(max_workers=20)#变量ex就是20线程
future = [ex.submit(down_pic,fenxi(toplist(url))) for url in urls]#ex.submit有2个参数,down_pic不要带括号和参数,逗号后边的参数就是一个可迭代对象用来给第一个参数使用的。
wait(future,return_when=ALL_COMPLETED)#多线程处理模块的wait,等待括号内的所有内容结束
print('=' * 50)
print('所有图片下载完成!')