Add support for another funeral home, at pabs' request
This commit is contained in:
parent
5bdb49fb80
commit
1e36186c9b
9
grab.lua
9
grab.lua
|
@ -59,6 +59,13 @@ wget.callbacks.get_urls = function(file, url, is_css, iri)
|
|||
end
|
||||
table.insert(addedUrls, { url="https://www.tharpsontheimerfh.com/pax/prnobit", post_data="ok=" .. ok .. "&fcf=0&bg=1"})
|
||||
end
|
||||
io.stderr:write(table.show(addedUrls, "Added URLs "))
|
||||
if url:match("^https://www.bestattung%-muellner.at/?.?.?/sterbefall/[^/]+/%\?action=gedenkkerzen") then
|
||||
local root = htmlparser.parse(data)
|
||||
local otherPages = root(".pagination .inactive")
|
||||
for index, pagination in ipairs(otherPages) do
|
||||
table.insert(addedUrls, { url=pagination.attributes.href})
|
||||
end
|
||||
end
|
||||
io.stderr:write(table.show(addedUrls, "Added URLs"))
|
||||
return addedUrls
|
||||
end
|
||||
|
|
12
pipeline.py
12
pipeline.py
|
@ -36,7 +36,7 @@ project = Project(
|
|||
# It will be added to the WARC files and reported to the tracker.
|
||||
VERSION = '20220428.01'
|
||||
#USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.183 Safari/537.36'
|
||||
TRACKER_ID = 'funeralhomes'
|
||||
TRACKER_ID = 'funeralhomestest'
|
||||
TRACKER_HOST = '172.17.0.1:8501'
|
||||
|
||||
WGET_AT = find_executable(
|
||||
|
@ -184,6 +184,16 @@ class WgetArgs(object):
|
|||
item_name = [f'https://downsandsonfuneralhome.com/tribute/details/{i_n[1]}/Dr-Alex-Klym/obituary.html']
|
||||
elif i_n[0] == 'tharpsontheimerfh':
|
||||
item_name = [f'https://www.tharpsontheimerfh.com/tributes/{i_n[1]}', f'https://www.tharpsontheimerfh.com/printnotice/{i_n[1]}/1o/1c/1q/0d/1b', f'https://www.tharpsontheimerfh.com/tributes/{i_n[1]}/guest-book', f'https://www.tharpsontheimerfh.com/tributes/{i_n[1]}/photo-album', f'https://www.tharpsontheimerfh.com/tributes/{i_n[1]}/services']
|
||||
elif i_n[0] == 'bestattung-mullner':
|
||||
ep = 'https://www.bestattung-muellner.at%s/sterbefall/%s/'
|
||||
eps = []
|
||||
for language in ('', '/en', '/sk'): # de, en, sk
|
||||
eps.append(ep % (language, i_n[1]))
|
||||
eps.append(ep % (language, i_n[1]) + '?action=parte')
|
||||
eps.append(ep % (language, i_n[1]) + '?action=sterbebild')
|
||||
eps.append(ep % (language, i_n[1]) + '?action=gedenkkerzen')
|
||||
eps.append(ep % (language, i_n[1]) + '?action=kondolenzbuch')
|
||||
item_name = eps
|
||||
else:
|
||||
raise TypeError("bad item type")
|
||||
item_urls+=(item_name)
|
||||
|
|
Loading…
Reference in New Issue