local htmlparser = require "htmlparser" require "table_show" function readAll(file) local f = assert(io.open(file, "rb")) local content = f:read("*all") f:close() return content end QUEUED_URLS = false function startswith(text, prefix) return text:find(prefix, 1, true) == 1 end wget.callbacks.httploop_result = function(url, err, http_stat) io.stderr:write(http_stat["statcode"] .. url["url"] .. "\n") end wget.callbacks.get_urls = function(file, url, is_css, iri) local addedUrls = {} local data = readAll(file) io.stderr:write("Read data\n") if url:match("https://downsandsonfuneralhome%.com/tribute/details/[^/]+/Dr%-Alex%-Klym/obituary%.html") then local root = htmlparser.parse(data) io.stderr:write("Read root\n") local dataa ={} dataa.obit = root("#obituary-link-list-item a") dataa.cond = root("#condolences-link-list-item a") dataa.serv = root("#service-link-list-item a") dataa.mems = root("#memories-link-list-item a") dataa.char = root("#charities-link-list-item a") dataa.prin = root(".print-obit-btn a") assert(not dataa.cond[2]) assert(not dataa.serv[2]) assert(not dataa.mems[2]) assert(not dataa.char[2]) assert(not dataa.prin[2]) assert(not dataa.obit[2]) -- make sure that there's only one element that fits the criteria table.insert(addedUrls, { url="https://downsandsonfuneralhome.com" .. dataa.obit[1].attributes.href}) table.insert(addedUrls, { url="https://downsandsonfuneralhome.com" .. dataa.cond[1].attributes.href}) table.insert(addedUrls, { url="https://downsandsonfuneralhome.com" .. dataa.serv[1].attributes.href}) table.insert(addedUrls, { url="https://downsandsonfuneralhome.com" .. dataa.char[1].attributes.href}) if dataa.prin[1] then table.insert(addedUrls, { url="https://downsandsonfuneralhome.com" .. dataa.prin[1].attributes.href}) end QUEUED_URLS = true end if url:match("https://www.tharpsontheimerfh.com/tributes/[^/]+/?$") then local ok=os.getenv("item_name") local root=htmlparser.parse(data) local div =root("#obitsbarV31") assert(not div[2]) if div[1] then local oid=div[1].attributes["data-oid"] table.insert(addedUrls, { url="https://www.tharpsontheimerfh.com/pax/twshgal", post_data="oid=" .. oid}) table.insert(addedUrls, { url="https://www.tharpsontheimerfh.com/pax/obpgsnvn", post_data="sn=tributewall&oid=" .. oid}) end table.insert(addedUrls, { url="https://www.tharpsontheimerfh.com/pax/prnobit", post_data="ok=" .. ok .. "&fcf=0&bg=1"}) end if url:match("^https://www.bestattung%-muellner.at/?.?.?/sterbefall/[^/]+/%\?action=gedenkkerzen") then local root = htmlparser.parse(data) local otherPages = root(".pagination .inactive") for index, pagination in ipairs(otherPages) do table.insert(addedUrls, { url=pagination.attributes.href}) end end io.stderr:write(table.show(addedUrls, "Added URLs")) return addedUrls end