funeralhomes-grab/grab.lua

72 lines
2.8 KiB
Lua

local htmlparser = require "htmlparser"
require "table_show"
function readAll(file)
local f = assert(io.open(file, "rb"))
local content = f:read("*all")
f:close()
return content
end
QUEUED_URLS = false
function startswith(text, prefix)
return text:find(prefix, 1, true) == 1
end
wget.callbacks.httploop_result = function(url, err, http_stat)
io.stderr:write(http_stat["statcode"] .. url["url"] .. "\n")
end
wget.callbacks.get_urls = function(file, url, is_css, iri)
local addedUrls = {}
local data = readAll(file)
io.stderr:write("Read data\n")
if url:match("https://downsandsonfuneralhome%.com/tribute/details/[^/]+/Dr%-Alex%-Klym/obituary%.html") then
local root = htmlparser.parse(data)
io.stderr:write("Read root\n")
local dataa ={}
dataa.obit = root("#obituary-link-list-item a")
dataa.cond = root("#condolences-link-list-item a")
dataa.serv = root("#service-link-list-item a")
dataa.mems = root("#memories-link-list-item a")
dataa.char = root("#charities-link-list-item a")
dataa.prin = root(".print-obit-btn a")
assert(not dataa.cond[2])
assert(not dataa.serv[2])
assert(not dataa.mems[2])
assert(not dataa.char[2])
assert(not dataa.prin[2])
assert(not dataa.obit[2]) -- make sure that there's only one element that fits the criteria
table.insert(addedUrls, { url="https://downsandsonfuneralhome.com" .. dataa.obit[1].attributes.href})
table.insert(addedUrls, { url="https://downsandsonfuneralhome.com" .. dataa.cond[1].attributes.href})
table.insert(addedUrls, { url="https://downsandsonfuneralhome.com" .. dataa.serv[1].attributes.href})
table.insert(addedUrls, { url="https://downsandsonfuneralhome.com" .. dataa.char[1].attributes.href})
if dataa.prin[1] then
table.insert(addedUrls, { url="https://downsandsonfuneralhome.com" .. dataa.prin[1].attributes.href})
end
QUEUED_URLS = true
end
if url:match("https://www.tharpsontheimerfh.com/tributes/[^/]+/?$") then
local ok=os.getenv("item_name")
local root=htmlparser.parse(data)
local div =root("#obitsbarV31")
assert(not div[2])
if div[1] then
local oid=div[1].attributes["data-oid"]
table.insert(addedUrls, { url="https://www.tharpsontheimerfh.com/pax/twshgal", post_data="oid=" .. oid})
table.insert(addedUrls, { url="https://www.tharpsontheimerfh.com/pax/obpgsnvn", post_data="sn=tributewall&oid=" .. oid})
end
table.insert(addedUrls, { url="https://www.tharpsontheimerfh.com/pax/prnobit", post_data="ok=" .. ok .. "&fcf=0&bg=1"})
end
if url:match("^https://www.bestattung%-muellner.at/?.?.?/sterbefall/[^/]+/%\?action=gedenkkerzen") then
local root = htmlparser.parse(data)
local otherPages = root(".pagination .inactive")
for index, pagination in ipairs(otherPages) do
table.insert(addedUrls, { url=pagination.attributes.href})
end
end
io.stderr:write(table.show(addedUrls, "Added URLs"))
return addedUrls
end