某市人才网系统爬虫源码(可自动投简历)
这套骑士CMS很早以前就见过了,自己还搭过很多次,以前也帮别人维护过.
时隔很多年再回首看看,PHPWIND和DZ都已被收购,下场好像都挺惨.
该源码改一改多线程,几乎就是一次CC攻击

import requests,re
from lxml import etree

class XYrc:
    def __init__(self):
        self.url = 'https://www.****.com/index.php?m=&c=members&a=login'
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36',
            'X-Requested-With': 'XMLHttpRequest',
            'Origin': 'https://www..****..com',
            'Referer': 'https://www..****..com/index.php?m=&c=members&a=login',
            'Host': 'www..****..com'
        }
        self.LoginData = {
            'username': '.****.',
            'password': '.****.'
        }

        self.s = requests.session()
        self.page = 50
        self.sum = 0 # 投递次数
        self.New_Companys = []
        self.New_CompanyUrls = []
    def GetHome(self):
        for page in range(1,self.page):
            r = self.s.post(url=self.url, data=self.LoginData, allow_redirects=True, headers=self.headers)
            r.encoding = 'utf-8'
            r = self.s.get(f'https://www..****..com/index.php?m=&c=jobs&a=jobs_list&page={page}')
            html = etree.HTML(r.text)
            ComPanys = html.xpath('''//div[@class='td3 link_gray6']/a/text()''')
            for i in ComPanys:
                if i not in self.New_Companys:
                    self.New_Companys.append(i)
            print('公司名称去重完毕')

            ComPanyUrls = html.xpath('''//div[@class='td3 link_gray6']/a/@href''')
            for i in ComPanyUrls:
                if i not in self.New_CompanyUrls:
                    self.New_CompanyUrls.append(i)
            print('公司网址去重完毕')
            if len(self.New_Companys) == len(self.New_CompanyUrls):
                print("主页公司数据相等!!!")
            else:
                print("数据不相等,请手动查阅………………")

        xinyurc.GetCompanyJobs()
    def GetCompanyJobs(self):
        for i in self.New_CompanyUrls:
            r = self.s.get(i)
            html = etree.HTML(r.text)
            OnLineJobs = html.xpath('''//div[@class='jobs']//div[@class='ljob']/a/text()''') #在招职位
            JobsUrl = html.xpath('''//div[@class='jobs']//div[@class='ljob']/a/@href''') #在招职位网址
            if len(JobsUrl) != 0:
                xinyurc.GetJobMessage(JobsUrl[0])
            else:
                print('未找到在招职位',JobsUrl)
    def GetJobMessage(self,JobsUrl):

        r = self.s.get(JobsUrl)
        html = etree.HTML(r.text)
        StrUrl = str(JobsUrl)
        patten = re.compile(r'\d+')
        jid = patten.findall(StrUrl)[0]
        SendUrl = f'https://www..****..com/index.php?m=&c=ajax_personal&a=resume_apply&jid={jid}' #投递简历地址
        try:
            ClickView = html.xpath('''//div[@class='appbtn J_check_truenum']/text()''') #点击查看
            if str(ClickView[0]) == '点击查看' and self.sum < 51:  # 简历最大投递次数,自行设置
                self.s.get(SendUrl)
                print('投递简历')
                xinyurc.AfterSend(JobsUrl) # 调用函数
        except:
            print('无需投递简历')
            tel = html.xpath('''//span[@class='tel']/text()''') # 联系电话
            Person = html.xpath('''//div[@class='contact']/div/text()''') #联系人
            Person = Person[1]
            Person = str(Person)
            re.sub('\s',' ',Person)
            patten = re.compile(r'''联系人:(.*?)\s''')
            Person = patten.findall(Person)
            Company = html.xpath('''//a[@class='line_substring']/text()''') #公司名称
            self.sum += 1
            print(Company,tel,Person)
    def AfterSend(self,JobsUrl):
        r = self.s.get(JobsUrl)
        html = etree.HTML(r.text)
        tel = html.xpath('''//span[@class='tel']/text()''')  # 联系电话
        Person = html.xpath('''//div[@class='contact']/div/text()''')  # 联系人
        Person = Person[1]
        Person = str(Person)
        re.sub('\s',' ',Person)
        patten = re.compile(r'''联系人:(.*?)\s''')
        Person = patten.findall(Person)
        Company = html.xpath('''//a[@class='line_substring']/text()''')  # 公司名称
        print(Company,tel, Person)
        self.sum += 1
if __name__ =="__main__":
    xinyurc = XYrc()
    xinyurc.GetHome()

发表回复

后才能评论