79 lines
2.1 KiB
Lua
79 lines
2.1 KiB
Lua
local htmlparser = require "htmlparser"
|
|
require "table_show"
|
|
|
|
function readAll(file)
|
|
local f = assert(io.open(file, "rb"))
|
|
local content = f:read("*all")
|
|
f:close()
|
|
return content
|
|
end
|
|
|
|
QUEUED_URLS = false
|
|
|
|
function split(s, sep)
|
|
local fields = {}
|
|
local pattern = string.format("([^%s]+)", sep)
|
|
string.gsub(s, pattern, function(c) fields[#fields + 1] = c end)
|
|
return fields
|
|
end
|
|
|
|
function startswith(text, prefix)
|
|
return text:find(prefix, 1, true) == 1
|
|
end
|
|
|
|
local function contains(table, val)
|
|
for i=1,#table do
|
|
if table[i] == val then
|
|
return true
|
|
end
|
|
end
|
|
return false
|
|
end
|
|
|
|
wget.callbacks.download_child_p = function(urlpos, parent, depth, start_url_parsed, iri, verdict, reason)
|
|
if contains(ign, urlpos.url.url) then
|
|
print("Skipping already-retrieved URL " .. urlpos.url.url)
|
|
return false
|
|
end
|
|
return verdict
|
|
end
|
|
|
|
wget.callbacks.httploop_result = function(url, err, http_stat)
|
|
io.stderr:write(http_stat["statcode"] .. url["url"] .. "\n")
|
|
if string.match(url.url, "/fetch%-comments%?") then
|
|
return
|
|
end
|
|
if string.match(url.url, "/r$") then
|
|
return
|
|
end
|
|
if http_stat.statcode == 200 then
|
|
table.insert(ign, url.url)
|
|
end
|
|
end
|
|
|
|
wget.callbacks.get_urls = function(file, url, is_css, iri)
|
|
local addedUrls = {}
|
|
local data = readAll(file)
|
|
local item_name = os.getenv("item_name")
|
|
if url:match("https://www%.strawpoll%.me/[^/]+/fetch%-comments%?page=.$") then
|
|
page = page + 1
|
|
table.insert(addedUrls, { url="https://www.strawpoll.me/" .. item_name .. "/fetch-comments?page=" .. page })
|
|
table.insert(addedUrls, { url="https://www.strawpoll.me/" .. item_name .. "/fetch-comments?page=" .. page .. "&_=1653344833516"})
|
|
end
|
|
io.stderr:write(table.show(addedUrls, "Added URLs "))
|
|
return addedUrls
|
|
end
|
|
|
|
wget.callbacks.finish = function(start_time, end_time, wall_time, numurls, total_downloaded_bytes, total_download_time)
|
|
local file = io.open(os.getenv("item_dir") .. "/alr_urls.txt", "w")
|
|
io.output(file)
|
|
io.write(table.concat(ign, "\n"))
|
|
io.close(file)
|
|
print("Wrote data.")
|
|
end
|
|
|
|
local _data = readAll(os.getenv("item_dir") .. "/alr_urls.txt")
|
|
ign = split(_data, "\n")
|
|
|
|
page = 1
|