make the ignored urls smaller
This commit is contained in:
parent
dd3f82e623
commit
79164e2c46
6
grab.lua
6
grab.lua
|
@ -40,6 +40,12 @@ end
|
|||
|
||||
wget.callbacks.httploop_result = function(url, err, http_stat)
|
||||
io.stderr:write(http_stat["statcode"] .. url["url"] .. "\n")
|
||||
if string.match(url.url, "/fetch-comments/") then
|
||||
return
|
||||
end
|
||||
if string.match(url.url, "/r$") then
|
||||
return
|
||||
end
|
||||
if http_stat.statcode == 200 then
|
||||
table.insert(ign, url.url)
|
||||
end
|
||||
|
|
|
@ -164,7 +164,7 @@ class WgetArgs(object):
|
|||
'--tries', '10',
|
||||
'--span-hosts',
|
||||
'--waitretry', '0',
|
||||
'-w', '1',
|
||||
'-w', '0.1',
|
||||
'--random-wait',
|
||||
'--warc-file', ItemInterpolation('%(item_dir)s/%(warc_file_base)s'),
|
||||
'--warc-header', 'operator: TheTechRobo <thetechrobo@protonmail.ch>',
|
||||
|
|
Loading…
Reference in New Issue