文章

小红书App爬虫测试

  • 不明所以然的你拿去源码运行起来也是无法正常抓取内容的!

111.gif

import requests,json,re
# url = 'http://www.xiaohongshu.com/api/sns/v10/search/notes?keyword=467226345&filters=%5B%5D&sort=popularity_descending&page=1&page_size=20&source=search_result_notes&search_id=F44A92CD5748E8DB2EAAF95728375179%40FD8C756CE9C512E347F8ECEEA1BFFE95&api_extra=&page_pos=0&allow_rewrite=1&geo=eyJsYXRpdHVkZSI6MzUuMDAwMjEwLCJsb25naXR1ZGUiOjEwNS41NjcyODR9%0A&word_request_id=&platform=android&deviceId=a93f4824-1122-3007-892c-31429772e8aa&device_fingerprint=202008071857502ca73c3344dab06650c5ac61c0d7687c01048878c7035da6&device_fingerprint1=202008071857502ca73c3344dab06650c5ac61c0d7687c01048878c7035da6&versionName=6.35.0.1&channel=BaiduButton&sid=session.1596798498489757957443&lang=zh-Hans&t=1596802465&fid=159679786710108c7477d5f5084499f6156cfb86675a&sign=d25af8fbc93376cee2aeaab6cd51e8af'

url = 'https://www.xiaohongshu.com/api/sns/v10/search/notes?keyword=%E7%BE%8E%E5%A5%B3&filters=%5B%5D&sort=&page=1&page_size=20&source=explore_feed&search_id=DE6D54BFE3B1374D5046E1BDFE5CC152&api_extra=&page_pos=0&allow_rewrite=1&geo=eyJsYXRpdHVkZSI6MzUuMDAwMjEwLCJsb25naXR1ZGUiOjEwNS41NjcyODR9%0A&word_request_id=&platform=android&deviceId=a93f4824-1122-3007-892c-31429772e8aa&device_fingerprint=202008071857502ca73c3344dab06650c5ac61c0d7687c01048878c7035da6&device_fingerprint1=202008071857502ca73c3344dab06650c5ac61c0d7687c01048878c7035da6&versionName=6.35.0.1&channel=BaiduButton&sid=session.1596798498489757957443&lang=zh-Hans&t=1596805370&fid=159679786710108c7477d5f5084499f6156cfb86675a&sign=5ab73c18dad603f287ffebbba1ec919a'


headers = {
'User-Agent': 'Dalvik/2.1.0 (Linux; U; Android 5.1.1; VOG-AL10 Build/HUAWEIVOG-AL10) Resolution/1080*1920 Version/6.35.0.1 Build/6350101 Device/(huawei;VOG-AL10) discover/6.35.0.1 NetType/WiFi)',

'xy-common-params': 'platform=android&deviceId=a93f4824-1122-3007-892c-31429772e8aa&device_fingerprint=202008071857502ca73c3344dab06650c5ac61c0d7687c01048878c7035da6&device_fingerprint1=202008071857502ca73c3344dab06650c5ac61c0d7687c01048878c7035da6&versionName=6.35.0.1&channel=BaiduButton&sid=session.1596798498489757957443&t=1596805370&fid=159679786710108c7477d5f5084499f6156cfb86675a&uis=light&identifier_flag=2',
'shield': '7407dde64d220694b98245cb6d004008',
'xy-platform-info': 'platform=android&build=6350101&deviceId=a93f4824-1122-3007-892c-31429772e8aa',
'Host': 'www.xiaohongshu.com',
'Connection': 'Keep-Alive',
'Accept-Encoding': 'gzip'
}


r = requests.get(url,headers=headers,verify=False)


r = r.json()

r = json.dumps(r,indent=2,ensure_ascii=False)

# with open('./2.webp','wb') as f:
#     f.write(r.content)

# print(r)
list_url_anim = re.findall('"url_size_large": "(.*?)"',r,re.S)
list_video = re.findall('"video_info":.*?"url": "(.*?)"',r,re.S)
list_gif = re.findall('"video_info":.*?"gif_url": "(.*?)"',r,re.S)

# print(list_url_anim)

for i in range(len(list_video)):
    r = requests.get(list_url_anim[i],headers=headers)
    with open(f'./{i}.webp','wb') as f:
        f.write(r.content)
        print('图片下载完成,需要使用chrome浏览器打开')
    m = requests.get(list_video[i], headers=headers)
    with open(f'./{i}.mp4','wb') as f:
        f.write(m.content)
        print('视频下载完成')
input('按任意键退出')
    # m = requests.get(list_gif[i], headers=headers)
    # with open(f'./{i}.gif', 'wb') as f:
    #     f.write(m.content)

原文来自:小红书App爬虫测试,尊重自己,尊重每一个人;转发请注明来源!
0 0

发表评论