文章

战神狼胥 小说 完整爬虫(出错自动重试)

import requests
from lxml import html
from time import sleep

class zhanshen:
    session = requests.session()
    header = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36'
    }
    session.headers.update()
    url = 'https://www.xbiquge.la/54/54376/27922265.html'#858章开始
    word = ''
    title = ''


    def req(self):
        r = self.session.get(url=self.url, headers=self.header)
        tree = html.fromstring(r.content.decode('utf-8'))
        self.word = tree.xpath('//*[@id="content"]/text()')
        self.title = tree.xpath('//title/text()')[0]
        obj.next_page(tree)

    def next_page(self,tree):
        try:
            nextpage = tree.xpath('//*[@id="wrapper"]/div/div/div/a[4]//@href')
            if len(nextpage) == 0:
                print('下一页未找到:',len(nextpage))
                obj.next_page(tree)
            print(nextpage)
            self.url = 'https://www.xbiquge.la/' + nextpage[0]
            print(self.url)
        except:
            print('Continue!')




    def save(self):
        obj.req()
        with open('战神狼胥858.txt','a',encoding='utf-8') as f:
            f.write(self.title + '\r\n')
        with open('战神狼胥858.txt','a',encoding='utf-8') as f:
            for word in self.word:
                f.write(word)
            print(self.title + '保存完毕')
            print('Save Finish!')





if __name__ == '__main__':
    obj = zhanshen()
    while(True):
        obj.save()
        print('done!')
        sleep(3)

    print('fall!')

原文来自:战神狼胥 小说 完整爬虫(出错自动重试),尊重自己,尊重每一个人;转发请注明来源!
0 0

发表评论