fix some bugs; more compatibility
This commit is contained in:
parent
8df93e1d56
commit
30ced3839c
|
@ -0,0 +1,69 @@
|
|||
names = []
|
||||
import bs4 as bs
|
||||
|
||||
import requests
|
||||
|
||||
def disco(page) -> list:
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:101.0) Gecko/20100101 Firefox/101.0',
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
|
||||
'Accept-Language': 'en-US,en;q=0.5',
|
||||
# 'Accept-Encoding': 'gzip, deflate, br',
|
||||
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
|
||||
'Connection': 'keep-alive',
|
||||
'Upgrade-Insecure-Requests': '1',
|
||||
'Sec-Fetch-Dest': 'document',
|
||||
'Sec-Fetch-Mode': 'navigate',
|
||||
'Sec-Fetch-Site': 'none',
|
||||
'Sec-Fetch-User': '?1',
|
||||
'Pragma': 'no-cache',
|
||||
'Cache-Control': 'no-cache',
|
||||
}
|
||||
data = {
|
||||
'pg': page,
|
||||
'term': '',
|
||||
'paginate': '1',
|
||||
'ym': '0',
|
||||
'showmiddlename': '0',
|
||||
'listcity': '0',
|
||||
'tgt': 'obitlist',
|
||||
'numlistings': '10',
|
||||
'sids': '10627',
|
||||
'typ': '1',
|
||||
'txtsrch': '0',
|
||||
}
|
||||
|
||||
response = requests.post('https://www.tharpsontheimerfh.com/pax/obitsrch', headers=headers, data=data)
|
||||
return response.text
|
||||
|
||||
for i in range(0, 191):
|
||||
soup = bs.BeautifulSoup(disco(i), 'lxml')
|
||||
for link in soup.select(".obitlist-title a"):
|
||||
names.append(link.get('href').split("/")[2])
|
||||
print(i)
|
||||
|
||||
name_ = (" tharpsontheimerfh:".join(names))
|
||||
print(name_)
|
||||
|
||||
headers = {
|
||||
'Content-Type': 'multipart/form-data; boundary=---------------------------13612081884110273351021381409',
|
||||
'Origin': 'http://localhost:8501',
|
||||
'Authorization': 'Basic dGhldGVjaHJvYm86ZWxpdmVJU3RoZWJlc3Q0OQ==',
|
||||
'Connection': 'keep-alive',
|
||||
'Referer': 'http://localhost:8501/funeralhomes/admin/queues',
|
||||
'Upgrade-Insecure-Requests': '1',
|
||||
'Sec-Fetch-Dest': 'document',
|
||||
'Sec-Fetch-Mode': 'navigate',
|
||||
'Sec-Fetch-Site': 'same-origin',
|
||||
'Sec-Fetch-User': '?1',
|
||||
'Pragma': 'no-cache',
|
||||
'Cache-Control': 'no-cache',
|
||||
}
|
||||
|
||||
|
||||
data = '-----------------------------13612081884110273351021381409\r\nContent-Disposition: form-data; name="queue"\r\n\r\ntodo\r\n-----------------------------13612081884110273351021381409\r\nContent-Disposition: form-data; name="downloader"\r\n\r\n\r\n-----------------------------13612081884110273351021381409\r\nContent-Disposition: form-data; name="check"\r\n\r\nno\r\n-----------------------------13612081884110273351021381409\r\nContent-Disposition: form-data; name="items"\r\n\r\n%s\r\n-----------------------------13612081884110273351021381409\r\nContent-Disposition: form-data; name="items-file"; filename=""\r\nContent-Type: application/octet-stream\r\n\r\n-----------------------------13612081884110273351021381409--\r\n'
|
||||
|
||||
|
||||
print(requests.post('http://localhost:8501/funeralhomes/admin/queues', allow_redirects=False, headers=headers, data=data % name_).headers.get("Location"))
|
||||
|
||||
|
6
grab.lua
6
grab.lua
|
@ -52,8 +52,10 @@ wget.callbacks.get_urls = function(file, url, is_css, iri)
|
|||
local root=htmlparser.parse(data)
|
||||
local div =root("#obitsbarV31")
|
||||
assert(not div[2])
|
||||
local oid=div[1].attributes["data-oid"]
|
||||
table.insert(addedUrls, { url="https://www.tharpsontheimerfh.com/pax/obpgsnvn", post_data="sn=tributewall&oid=" .. oid})
|
||||
if div[1] then
|
||||
local oid=div[1].attributes["data-oid"]
|
||||
table.insert(addedUrls, { url="https://www.tharpsontheimerfh.com/pax/obpgsnvn", post_data="sn=tributewall&oid=" .. oid})
|
||||
end
|
||||
table.insert(addedUrls, { url="https://www.tharpsontheimerfh.com/pax/prnobit", post_data="ok=" .. ok .. "&fcf=0&bg=1"})
|
||||
end
|
||||
io.stderr:write(table.show(addedUrls, "Added URLs "))
|
||||
|
|
|
@ -36,7 +36,7 @@ project = Project(
|
|||
# It will be added to the WARC files and reported to the tracker.
|
||||
VERSION = '20220428.01'
|
||||
#USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.183 Safari/537.36'
|
||||
TRACKER_ID = 'funeralhomestest'
|
||||
TRACKER_ID = 'funeralhomes'
|
||||
TRACKER_HOST = '172.17.0.1:8501'
|
||||
|
||||
WGET_AT = find_executable(
|
||||
|
@ -182,8 +182,10 @@ class WgetArgs(object):
|
|||
i_n = item_name.split(':')
|
||||
if i_n[0] == 'downsandson':
|
||||
item_name = [f'https://downsandsonfuneralhome.com/tribute/details/{i_n[1]}/Dr-Alex-Klym/obituary.html']
|
||||
if i_n[0] == 'tharpsontheimerfh':
|
||||
item_name = [f'https://www.tharpsontheimerfh.com/tributes/{i_n[1]}', f'https://www.tharpsontheimerfh.com/printnotice/{i_n[1]}/1o/1c/1q/0d/1b']
|
||||
elif i_n[0] == 'tharpsontheimerfh':
|
||||
item_name = [f'https://www.tharpsontheimerfh.com/tributes/{i_n[1]}', f'https://www.tharpsontheimerfh.com/printnotice/{i_n[1]}/1o/1c/1q/0d/1b', f'https://www.tharpsontheimerfh.com/tributes/{i_n[1]}/guest-book', f'https://www.tharpsontheimerfh.com/tributes/{i_n[1]}/photo-album', f'https://www.tharpsontheimerfh.com/tributes/{i_n[1]}/services']
|
||||
else:
|
||||
raise TypeError("bad item type")
|
||||
item_urls+=(item_name)
|
||||
wget_args+=(item_name)
|
||||
|
||||
|
|
Loading…
Reference in New Issue