这套骑士CMS很早以前就见过了,自己还搭过很多次,以前也帮别人维护过.
时隔很多年再回首看看,PHPWIND和DZ都已被收购,下场好像都挺惨.
该源码改一改多线程,几乎就是一次CC攻击
import requests,re
from lxml import etree
class XYrc:
def __init__(self):
self.url = 'https://www.****.com/index.php?m=&c=members&a=login'
self.headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36',
'X-Requested-With': 'XMLHttpRequest',
'Origin': 'https://www..****..com',
'Referer': 'https://www..****..com/index.php?m=&c=members&a=login',
'Host': 'www..****..com'
}
self.LoginData = {
'username': '.****.',
'password': '.****.'
}
self.s = requests.session()
self.page = 50
self.sum = 0 # 投递次数
self.New_Companys = []
self.New_CompanyUrls = []
def GetHome(self):
for page in range(1,self.page):
r = self.s.post(url=self.url, data=self.LoginData, allow_redirects=True, headers=self.headers)
r.encoding = 'utf-8'
r = self.s.get(f'https://www..****..com/index.php?m=&c=jobs&a=jobs_list&page={page}')
html = etree.HTML(r.text)
ComPanys = html.xpath('''//div[@class='td3 link_gray6']/a/text()''')
for i in ComPanys:
if i not in self.New_Companys:
self.New_Companys.append(i)
print('公司名称去重完毕')
ComPanyUrls = html.xpath('''//div[@class='td3 link_gray6']/a/@href''')
for i in ComPanyUrls:
if i not in self.New_CompanyUrls:
self.New_CompanyUrls.append(i)
print('公司网址去重完毕')
if len(self.New_Companys) == len(self.New_CompanyUrls):
print("主页公司数据相等!!!")
else:
print("数据不相等,请手动查阅………………")
xinyurc.GetCompanyJobs()
def GetCompanyJobs(self):
for i in self.New_CompanyUrls:
r = self.s.get(i)
html = etree.HTML(r.text)
OnLineJobs = html.xpath('''//div[@class='jobs']//div[@class='ljob']/a/text()''') #在招职位
JobsUrl = html.xpath('''//div[@class='jobs']//div[@class='ljob']/a/@href''') #在招职位网址
if len(JobsUrl) != 0:
xinyurc.GetJobMessage(JobsUrl[0])
else:
print('未找到在招职位',JobsUrl)
def GetJobMessage(self,JobsUrl):
r = self.s.get(JobsUrl)
html = etree.HTML(r.text)
StrUrl = str(JobsUrl)
patten = re.compile(r'\d+')
jid = patten.findall(StrUrl)[0]
SendUrl = f'https://www..****..com/index.php?m=&c=ajax_personal&a=resume_apply&jid={jid}' #投递简历地址
try:
ClickView = html.xpath('''//div[@class='appbtn J_check_truenum']/text()''') #点击查看
if str(ClickView[0]) == '点击查看' and self.sum < 51: # 简历最大投递次数,自行设置
self.s.get(SendUrl)
print('投递简历')
xinyurc.AfterSend(JobsUrl) # 调用函数
except:
print('无需投递简历')
tel = html.xpath('''//span[@class='tel']/text()''') # 联系电话
Person = html.xpath('''//div[@class='contact']/div/text()''') #联系人
Person = Person[1]
Person = str(Person)
re.sub('\s',' ',Person)
patten = re.compile(r'''联系人:(.*?)\s''')
Person = patten.findall(Person)
Company = html.xpath('''//a[@class='line_substring']/text()''') #公司名称
self.sum += 1
print(Company,tel,Person)
def AfterSend(self,JobsUrl):
r = self.s.get(JobsUrl)
html = etree.HTML(r.text)
tel = html.xpath('''//span[@class='tel']/text()''') # 联系电话
Person = html.xpath('''//div[@class='contact']/div/text()''') # 联系人
Person = Person[1]
Person = str(Person)
re.sub('\s',' ',Person)
patten = re.compile(r'''联系人:(.*?)\s''')
Person = patten.findall(Person)
Company = html.xpath('''//a[@class='line_substring']/text()''') # 公司名称
print(Company,tel, Person)
self.sum += 1
if __name__ =="__main__":
xinyurc = XYrc()
xinyurc.GetHome()