response = requests.get( 'http://m.rigasin.com/forum.php',
headers=self.headers, # cookies=self.cookies, params=params
)
ret = Selector(response)
tid_l = ret.xpath('//*[@id="threadlisttableid"]/tbody[contains(@id,"normalthread")]/@id').extract()
if not tid_s:
tid_s = set()
old_count = len(tid_s)
for i in tid_l:
tid_s.add(i.replace("normalthread_", ""))
new_count = len(tid_s)
if new_count != old_count:
page += 1
return self.fid_to_tid(fid, page=page, tid_s=tid_s)
else:
return tid_s
def get_content(self, fid, tid):
response = requests.get( f"http://m.rigasin.com/forum.php?mod=viewthread&tid={tid}&extra=page%3D1",
headers=self.headers
)
if "如果您要查看本帖隱藏內(nèi)容請" in response.text:
self.post_content(fid, tid)
time.sleep(60)
return self.get_content(fid, tid)
else: # time.sleep(1) result = Selector(response)
url_info = result.xpath('//div[@class="showhide"]//text()').extract()
try:
bd_url = [i for i in url_info if "https://" in i][0]
bd_pwd = re.findall(r"\w\w\w\w", [i for i in url_info if "提取碼" in i][0])[0]