diff --git a/JSON.lua b/JSON.lua deleted file mode 100644 index 5f11425..0000000 --- a/JSON.lua +++ /dev/null @@ -1,1053 +0,0 @@ --- -*- coding: utf-8 -*- --- --- Simple JSON encoding and decoding in pure Lua. --- --- Copyright 2010-2014 Jeffrey Friedl --- http://regex.info/blog/ --- --- Latest version: http://regex.info/blog/lua/json --- --- This code is released under a Creative Commons CC-BY "Attribution" License: --- http://creativecommons.org/licenses/by/3.0/deed.en_US --- --- It can be used for any purpose so long as the copyright notice above, --- the web-page links above, and the 'AUTHOR_NOTE' string below are --- maintained. Enjoy. --- -local VERSION = 20141223.14 -- version history at end of file -local AUTHOR_NOTE = "-[ JSON.lua package by Jeffrey Friedl (http://regex.info/blog/lua/json) version 20141223.14 ]-" - --- --- The 'AUTHOR_NOTE' variable exists so that information about the source --- of the package is maintained even in compiled versions. It's also --- included in OBJDEF below mostly to quiet warnings about unused variables. --- -local OBJDEF = { - VERSION = VERSION, - AUTHOR_NOTE = AUTHOR_NOTE, -} - - --- --- Simple JSON encoding and decoding in pure Lua. --- http://www.json.org/ --- --- --- JSON = assert(loadfile "JSON.lua")() -- one-time load of the routines --- --- local lua_value = JSON:decode(raw_json_text) --- --- local raw_json_text = JSON:encode(lua_table_or_value) --- local pretty_json_text = JSON:encode_pretty(lua_table_or_value) -- "pretty printed" version for human readability --- --- --- --- DECODING (from a JSON string to a Lua table) --- --- --- JSON = assert(loadfile "JSON.lua")() -- one-time load of the routines --- --- local lua_value = JSON:decode(raw_json_text) --- --- If the JSON text is for an object or an array, e.g. --- { "what": "books", "count": 3 } --- or --- [ "Larry", "Curly", "Moe" ] --- --- the result is a Lua table, e.g. --- { what = "books", count = 3 } --- or --- { "Larry", "Curly", "Moe" } --- --- --- The encode and decode routines accept an optional second argument, --- "etc", which is not used during encoding or decoding, but upon error --- is passed along to error handlers. It can be of any type (including nil). --- --- --- --- ERROR HANDLING --- --- With most errors during decoding, this code calls --- --- JSON:onDecodeError(message, text, location, etc) --- --- with a message about the error, and if known, the JSON text being --- parsed and the byte count where the problem was discovered. You can --- replace the default JSON:onDecodeError() with your own function. --- --- The default onDecodeError() merely augments the message with data --- about the text and the location if known (and if a second 'etc' --- argument had been provided to decode(), its value is tacked onto the --- message as well), and then calls JSON.assert(), which itself defaults --- to Lua's built-in assert(), and can also be overridden. --- --- For example, in an Adobe Lightroom plugin, you might use something like --- --- function JSON:onDecodeError(message, text, location, etc) --- LrErrors.throwUserError("Internal Error: invalid JSON data") --- end --- --- or even just --- --- function JSON.assert(message) --- LrErrors.throwUserError("Internal Error: " .. message) --- end --- --- If JSON:decode() is passed a nil, this is called instead: --- --- JSON:onDecodeOfNilError(message, nil, nil, etc) --- --- and if JSON:decode() is passed HTML instead of JSON, this is called: --- --- JSON:onDecodeOfHTMLError(message, text, nil, etc) --- --- The use of the fourth 'etc' argument allows stronger coordination --- between decoding and error reporting, especially when you provide your --- own error-handling routines. Continuing with the the Adobe Lightroom --- plugin example: --- --- function JSON:onDecodeError(message, text, location, etc) --- local note = "Internal Error: invalid JSON data" --- if type(etc) = 'table' and etc.photo then --- note = note .. " while processing for " .. etc.photo:getFormattedMetadata('fileName') --- end --- LrErrors.throwUserError(note) --- end --- --- : --- : --- --- for i, photo in ipairs(photosToProcess) do --- : --- : --- local data = JSON:decode(someJsonText, { photo = photo }) --- : --- : --- end --- --- --- --- --- --- DECODING AND STRICT TYPES --- --- Because both JSON objects and JSON arrays are converted to Lua tables, --- it's not normally possible to tell which original JSON type a --- particular Lua table was derived from, or guarantee decode-encode --- round-trip equivalency. --- --- However, if you enable strictTypes, e.g. --- --- JSON = assert(loadfile "JSON.lua")() --load the routines --- JSON.strictTypes = true --- --- then the Lua table resulting from the decoding of a JSON object or --- JSON array is marked via Lua metatable, so that when re-encoded with --- JSON:encode() it ends up as the appropriate JSON type. --- --- (This is not the default because other routines may not work well with --- tables that have a metatable set, for example, Lightroom API calls.) --- --- --- ENCODING (from a lua table to a JSON string) --- --- JSON = assert(loadfile "JSON.lua")() -- one-time load of the routines --- --- local raw_json_text = JSON:encode(lua_table_or_value) --- local pretty_json_text = JSON:encode_pretty(lua_table_or_value) -- "pretty printed" version for human readability --- local custom_pretty = JSON:encode(lua_table_or_value, etc, { pretty = true, indent = "| ", align_keys = false }) --- --- On error during encoding, this code calls: --- --- JSON:onEncodeError(message, etc) --- --- which you can override in your local JSON object. --- --- The 'etc' in the error call is the second argument to encode() --- and encode_pretty(), or nil if it wasn't provided. --- --- --- PRETTY-PRINTING --- --- An optional third argument, a table of options, allows a bit of --- configuration about how the encoding takes place: --- --- pretty = JSON:encode(val, etc, { --- pretty = true, -- if false, no other options matter --- indent = " ", -- this provides for a three-space indent per nesting level --- align_keys = false, -- see below --- }) --- --- encode() and encode_pretty() are identical except that encode_pretty() --- provides a default options table if none given in the call: --- --- { pretty = true, align_keys = false, indent = " " } --- --- For example, if --- --- JSON:encode(data) --- --- produces: --- --- {"city":"Kyoto","climate":{"avg_temp":16,"humidity":"high","snowfall":"minimal"},"country":"Japan","wards":11} --- --- then --- --- JSON:encode_pretty(data) --- --- produces: --- --- { --- "city": "Kyoto", --- "climate": { --- "avg_temp": 16, --- "humidity": "high", --- "snowfall": "minimal" --- }, --- "country": "Japan", --- "wards": 11 --- } --- --- The following three lines return identical results: --- JSON:encode_pretty(data) --- JSON:encode_pretty(data, nil, { pretty = true, align_keys = false, indent = " " }) --- JSON:encode (data, nil, { pretty = true, align_keys = false, indent = " " }) --- --- An example of setting your own indent string: --- --- JSON:encode_pretty(data, nil, { pretty = true, indent = "| " }) --- --- produces: --- --- { --- | "city": "Kyoto", --- | "climate": { --- | | "avg_temp": 16, --- | | "humidity": "high", --- | | "snowfall": "minimal" --- | }, --- | "country": "Japan", --- | "wards": 11 --- } --- --- An example of setting align_keys to true: --- --- JSON:encode_pretty(data, nil, { pretty = true, indent = " ", align_keys = true }) --- --- produces: --- --- { --- "city": "Kyoto", --- "climate": { --- "avg_temp": 16, --- "humidity": "high", --- "snowfall": "minimal" --- }, --- "country": "Japan", --- "wards": 11 --- } --- --- which I must admit is kinda ugly, sorry. This was the default for --- encode_pretty() prior to version 20141223.14. --- --- --- AMBIGUOUS SITUATIONS DURING THE ENCODING --- --- During the encode, if a Lua table being encoded contains both string --- and numeric keys, it fits neither JSON's idea of an object, nor its --- idea of an array. To get around this, when any string key exists (or --- when non-positive numeric keys exist), numeric keys are converted to --- strings. --- --- For example, --- JSON:encode({ "one", "two", "three", SOMESTRING = "some string" })) --- produces the JSON object --- {"1":"one","2":"two","3":"three","SOMESTRING":"some string"} --- --- To prohibit this conversion and instead make it an error condition, set --- JSON.noKeyConversion = true --- - - - - --- --- SUMMARY OF METHODS YOU CAN OVERRIDE IN YOUR LOCAL LUA JSON OBJECT --- --- assert --- onDecodeError --- onDecodeOfNilError --- onDecodeOfHTMLError --- onEncodeError --- --- If you want to create a separate Lua JSON object with its own error handlers, --- you can reload JSON.lua or use the :new() method. --- ---------------------------------------------------------------------------- - -local default_pretty_indent = " " -local default_pretty_options = { pretty = true, align_keys = false, indent = default_pretty_indent } - -local isArray = { __tostring = function() return "JSON array" end } isArray.__index = isArray -local isObject = { __tostring = function() return "JSON object" end } isObject.__index = isObject - - -function OBJDEF:newArray(tbl) - return setmetatable(tbl or {}, isArray) -end - -function OBJDEF:newObject(tbl) - return setmetatable(tbl or {}, isObject) -end - -local function unicode_codepoint_as_utf8(codepoint) - -- - -- codepoint is a number - -- - if codepoint <= 127 then - return string.char(codepoint) - - elseif codepoint <= 2047 then - -- - -- 110yyyxx 10xxxxxx <-- useful notation from http://en.wikipedia.org/wiki/Utf8 - -- - local highpart = math.floor(codepoint / 0x40) - local lowpart = codepoint - (0x40 * highpart) - return string.char(0xC0 + highpart, - 0x80 + lowpart) - - elseif codepoint <= 65535 then - -- - -- 1110yyyy 10yyyyxx 10xxxxxx - -- - local highpart = math.floor(codepoint / 0x1000) - local remainder = codepoint - 0x1000 * highpart - local midpart = math.floor(remainder / 0x40) - local lowpart = remainder - 0x40 * midpart - - highpart = 0xE0 + highpart - midpart = 0x80 + midpart - lowpart = 0x80 + lowpart - - -- - -- Check for an invalid character (thanks Andy R. at Adobe). - -- See table 3.7, page 93, in http://www.unicode.org/versions/Unicode5.2.0/ch03.pdf#G28070 - -- - if ( highpart == 0xE0 and midpart < 0xA0 ) or - ( highpart == 0xED and midpart > 0x9F ) or - ( highpart == 0xF0 and midpart < 0x90 ) or - ( highpart == 0xF4 and midpart > 0x8F ) - then - return "?" - else - return string.char(highpart, - midpart, - lowpart) - end - - else - -- - -- 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx - -- - local highpart = math.floor(codepoint / 0x40000) - local remainder = codepoint - 0x40000 * highpart - local midA = math.floor(remainder / 0x1000) - remainder = remainder - 0x1000 * midA - local midB = math.floor(remainder / 0x40) - local lowpart = remainder - 0x40 * midB - - return string.char(0xF0 + highpart, - 0x80 + midA, - 0x80 + midB, - 0x80 + lowpart) - end -end - -function OBJDEF:onDecodeError(message, text, location, etc) - if text then - if location then - message = string.format("%s at char %d of: %s", message, location, text) - else - message = string.format("%s: %s", message, text) - end - end - - if etc ~= nil then - message = message .. " (" .. OBJDEF:encode(etc) .. ")" - end - - if self.assert then - self.assert(false, message) - else - assert(false, message) - end -end - -OBJDEF.onDecodeOfNilError = OBJDEF.onDecodeError -OBJDEF.onDecodeOfHTMLError = OBJDEF.onDecodeError - -function OBJDEF:onEncodeError(message, etc) - if etc ~= nil then - message = message .. " (" .. OBJDEF:encode(etc) .. ")" - end - - if self.assert then - self.assert(false, message) - else - assert(false, message) - end -end - -local function grok_number(self, text, start, etc) - -- - -- Grab the integer part - -- - local integer_part = text:match('^-?[1-9]%d*', start) - or text:match("^-?0", start) - - if not integer_part then - self:onDecodeError("expected number", text, start, etc) - end - - local i = start + integer_part:len() - - -- - -- Grab an optional decimal part - -- - local decimal_part = text:match('^%.%d+', i) or "" - - i = i + decimal_part:len() - - -- - -- Grab an optional exponential part - -- - local exponent_part = text:match('^[eE][-+]?%d+', i) or "" - - i = i + exponent_part:len() - - local full_number_text = integer_part .. decimal_part .. exponent_part - local as_number = tonumber(full_number_text) - - if not as_number then - self:onDecodeError("bad number", text, start, etc) - end - - return as_number, i -end - - -local function grok_string(self, text, start, etc) - - if text:sub(start,start) ~= '"' then - self:onDecodeError("expected string's opening quote", text, start, etc) - end - - local i = start + 1 -- +1 to bypass the initial quote - local text_len = text:len() - local VALUE = "" - while i <= text_len do - local c = text:sub(i,i) - if c == '"' then - return VALUE, i + 1 - end - if c ~= '\\' then - VALUE = VALUE .. c - i = i + 1 - elseif text:match('^\\b', i) then - VALUE = VALUE .. "\b" - i = i + 2 - elseif text:match('^\\f', i) then - VALUE = VALUE .. "\f" - i = i + 2 - elseif text:match('^\\n', i) then - VALUE = VALUE .. "\n" - i = i + 2 - elseif text:match('^\\r', i) then - VALUE = VALUE .. "\r" - i = i + 2 - elseif text:match('^\\t', i) then - VALUE = VALUE .. "\t" - i = i + 2 - else - local hex = text:match('^\\u([0123456789aAbBcCdDeEfF][0123456789aAbBcCdDeEfF][0123456789aAbBcCdDeEfF][0123456789aAbBcCdDeEfF])', i) - if hex then - i = i + 6 -- bypass what we just read - - -- We have a Unicode codepoint. It could be standalone, or if in the proper range and - -- followed by another in a specific range, it'll be a two-code surrogate pair. - local codepoint = tonumber(hex, 16) - if codepoint >= 0xD800 and codepoint <= 0xDBFF then - -- it's a hi surrogate... see whether we have a following low - local lo_surrogate = text:match('^\\u([dD][cdefCDEF][0123456789aAbBcCdDeEfF][0123456789aAbBcCdDeEfF])', i) - if lo_surrogate then - i = i + 6 -- bypass the low surrogate we just read - codepoint = 0x2400 + (codepoint - 0xD800) * 0x400 + tonumber(lo_surrogate, 16) - else - -- not a proper low, so we'll just leave the first codepoint as is and spit it out. - end - end - VALUE = VALUE .. unicode_codepoint_as_utf8(codepoint) - - else - - -- just pass through what's escaped - VALUE = VALUE .. text:match('^\\(.)', i) - i = i + 2 - end - end - end - - self:onDecodeError("unclosed string", text, start, etc) -end - -local function skip_whitespace(text, start) - - local _, match_end = text:find("^[ \n\r\t]+", start) -- [http://www.ietf.org/rfc/rfc4627.txt] Section 2 - if match_end then - return match_end + 1 - else - return start - end -end - -local grok_one -- assigned later - -local function grok_object(self, text, start, etc) - if text:sub(start,start) ~= '{' then - self:onDecodeError("expected '{'", text, start, etc) - end - - local i = skip_whitespace(text, start + 1) -- +1 to skip the '{' - - local VALUE = self.strictTypes and self:newObject { } or { } - - if text:sub(i,i) == '}' then - return VALUE, i + 1 - end - local text_len = text:len() - while i <= text_len do - local key, new_i = grok_string(self, text, i, etc) - - i = skip_whitespace(text, new_i) - - if text:sub(i, i) ~= ':' then - self:onDecodeError("expected colon", text, i, etc) - end - - i = skip_whitespace(text, i + 1) - - local new_val, new_i = grok_one(self, text, i) - - VALUE[key] = new_val - - -- - -- Expect now either '}' to end things, or a ',' to allow us to continue. - -- - i = skip_whitespace(text, new_i) - - local c = text:sub(i,i) - - if c == '}' then - return VALUE, i + 1 - end - - if text:sub(i, i) ~= ',' then - self:onDecodeError("expected comma or '}'", text, i, etc) - end - - i = skip_whitespace(text, i + 1) - end - - self:onDecodeError("unclosed '{'", text, start, etc) -end - -local function grok_array(self, text, start, etc) - if text:sub(start,start) ~= '[' then - self:onDecodeError("expected '['", text, start, etc) - end - - local i = skip_whitespace(text, start + 1) -- +1 to skip the '[' - local VALUE = self.strictTypes and self:newArray { } or { } - if text:sub(i,i) == ']' then - return VALUE, i + 1 - end - - local VALUE_INDEX = 1 - - local text_len = text:len() - while i <= text_len do - local val, new_i = grok_one(self, text, i) - - -- can't table.insert(VALUE, val) here because it's a no-op if val is nil - VALUE[VALUE_INDEX] = val - VALUE_INDEX = VALUE_INDEX + 1 - - i = skip_whitespace(text, new_i) - - -- - -- Expect now either ']' to end things, or a ',' to allow us to continue. - -- - local c = text:sub(i,i) - if c == ']' then - return VALUE, i + 1 - end - if text:sub(i, i) ~= ',' then - self:onDecodeError("expected comma or '['", text, i, etc) - end - i = skip_whitespace(text, i + 1) - end - self:onDecodeError("unclosed '['", text, start, etc) -end - - -grok_one = function(self, text, start, etc) - -- Skip any whitespace - start = skip_whitespace(text, start) - - if start > text:len() then - self:onDecodeError("unexpected end of string", text, nil, etc) - end - - if text:find('^"', start) then - return grok_string(self, text, start, etc) - - elseif text:find('^[-0123456789 ]', start) then - return grok_number(self, text, start, etc) - - elseif text:find('^%{', start) then - return grok_object(self, text, start, etc) - - elseif text:find('^%[', start) then - return grok_array(self, text, start, etc) - - elseif text:find('^true', start) then - return true, start + 4 - - elseif text:find('^false', start) then - return false, start + 5 - - elseif text:find('^null', start) then - return nil, start + 4 - - else - self:onDecodeError("can't parse JSON", text, start, etc) - end -end - -function OBJDEF:decode(text, etc) - if type(self) ~= 'table' or self.__index ~= OBJDEF then - OBJDEF:onDecodeError("JSON:decode must be called in method format", nil, nil, etc) - end - - if text == nil then - self:onDecodeOfNilError(string.format("nil passed to JSON:decode()"), nil, nil, etc) - elseif type(text) ~= 'string' then - self:onDecodeError(string.format("expected string argument to JSON:decode(), got %s", type(text)), nil, nil, etc) - end - - if text:match('^%s*$') then - return nil - end - - if text:match('^%s*<') then - -- Can't be JSON... we'll assume it's HTML - self:onDecodeOfHTMLError(string.format("html passed to JSON:decode()"), text, nil, etc) - end - - -- - -- Ensure that it's not UTF-32 or UTF-16. - -- Those are perfectly valid encodings for JSON (as per RFC 4627 section 3), - -- but this package can't handle them. - -- - if text:sub(1,1):byte() == 0 or (text:len() >= 2 and text:sub(2,2):byte() == 0) then - self:onDecodeError("JSON package groks only UTF-8, sorry", text, nil, etc) - end - - local success, value = pcall(grok_one, self, text, 1, etc) - - if success then - return value - else - -- if JSON:onDecodeError() didn't abort out of the pcall, we'll have received the error message here as "value", so pass it along as an assert. - if self.assert then - self.assert(false, value) - else - assert(false, value) - end - -- and if we're still here, return a nil and throw the error message on as a second arg - return nil, value - end -end - -local function backslash_replacement_function(c) - if c == "\n" then - return "\\n" - elseif c == "\r" then - return "\\r" - elseif c == "\t" then - return "\\t" - elseif c == "\b" then - return "\\b" - elseif c == "\f" then - return "\\f" - elseif c == '"' then - return '\\"' - elseif c == '\\' then - return '\\\\' - else - return string.format("\\u%04x", c:byte()) - end -end - -local chars_to_be_escaped_in_JSON_string - = '[' - .. '"' -- class sub-pattern to match a double quote - .. '%\\' -- class sub-pattern to match a backslash - .. '%z' -- class sub-pattern to match a null - .. '\001' .. '-' .. '\031' -- class sub-pattern to match control characters - .. ']' - -local function json_string_literal(value) - local newval = value:gsub(chars_to_be_escaped_in_JSON_string, backslash_replacement_function) - return '"' .. newval .. '"' -end - -local function object_or_array(self, T, etc) - -- - -- We need to inspect all the keys... if there are any strings, we'll convert to a JSON - -- object. If there are only numbers, it's a JSON array. - -- - -- If we'll be converting to a JSON object, we'll want to sort the keys so that the - -- end result is deterministic. - -- - local string_keys = { } - local number_keys = { } - local number_keys_must_be_strings = false - local maximum_number_key - - for key in pairs(T) do - if type(key) == 'string' then - table.insert(string_keys, key) - elseif type(key) == 'number' then - table.insert(number_keys, key) - if key <= 0 or key >= math.huge then - number_keys_must_be_strings = true - elseif not maximum_number_key or key > maximum_number_key then - maximum_number_key = key - end - else - self:onEncodeError("can't encode table with a key of type " .. type(key), etc) - end - end - - if #string_keys == 0 and not number_keys_must_be_strings then - -- - -- An empty table, or a numeric-only array - -- - if #number_keys > 0 then - return nil, maximum_number_key -- an array - elseif tostring(T) == "JSON array" then - return nil - elseif tostring(T) == "JSON object" then - return { } - else - -- have to guess, so we'll pick array, since empty arrays are likely more common than empty objects - return nil - end - end - - table.sort(string_keys) - - local map - if #number_keys > 0 then - -- - -- If we're here then we have either mixed string/number keys, or numbers inappropriate for a JSON array - -- It's not ideal, but we'll turn the numbers into strings so that we can at least create a JSON object. - -- - - if self.noKeyConversion then - self:onEncodeError("a table with both numeric and string keys could be an object or array; aborting", etc) - end - - -- - -- Have to make a shallow copy of the source table so we can remap the numeric keys to be strings - -- - map = { } - for key, val in pairs(T) do - map[key] = val - end - - table.sort(number_keys) - - -- - -- Throw numeric keys in there as strings - -- - for _, number_key in ipairs(number_keys) do - local string_key = tostring(number_key) - if map[string_key] == nil then - table.insert(string_keys , string_key) - map[string_key] = T[number_key] - else - self:onEncodeError("conflict converting table with mixed-type keys into a JSON object: key " .. number_key .. " exists both as a string and a number.", etc) - end - end - end - - return string_keys, nil, map -end - --- --- Encode --- --- 'options' is nil, or a table with possible keys: --- pretty -- if true, return a pretty-printed version --- indent -- a string (usually of spaces) used to indent each nested level --- align_keys -- if true, align all the keys when formatting a table --- -local encode_value -- must predeclare because it calls itself -function encode_value(self, value, parents, etc, options, indent) - - if value == nil then - return 'null' - - elseif type(value) == 'string' then - return json_string_literal(value) - - elseif type(value) == 'number' then - if value ~= value then - -- - -- NaN (Not a Number). - -- JSON has no NaN, so we have to fudge the best we can. This should really be a package option. - -- - return "null" - elseif value >= math.huge then - -- - -- Positive infinity. JSON has no INF, so we have to fudge the best we can. This should - -- really be a package option. Note: at least with some implementations, positive infinity - -- is both ">= math.huge" and "<= -math.huge", which makes no sense but that's how it is. - -- Negative infinity is properly "<= -math.huge". So, we must be sure to check the ">=" - -- case first. - -- - return "1e+9999" - elseif value <= -math.huge then - -- - -- Negative infinity. - -- JSON has no INF, so we have to fudge the best we can. This should really be a package option. - -- - return "-1e+9999" - else - return tostring(value) - end - - elseif type(value) == 'boolean' then - return tostring(value) - - elseif type(value) ~= 'table' then - self:onEncodeError("can't convert " .. type(value) .. " to JSON", etc) - - else - -- - -- A table to be converted to either a JSON object or array. - -- - local T = value - - if type(options) ~= 'table' then - options = {} - end - if type(indent) ~= 'string' then - indent = "" - end - - if parents[T] then - self:onEncodeError("table " .. tostring(T) .. " is a child of itself", etc) - else - parents[T] = true - end - - local result_value - - local object_keys, maximum_number_key, map = object_or_array(self, T, etc) - if maximum_number_key then - -- - -- An array... - -- - local ITEMS = { } - for i = 1, maximum_number_key do - table.insert(ITEMS, encode_value(self, T[i], parents, etc, options, indent)) - end - - if options.pretty then - result_value = "[ " .. table.concat(ITEMS, ", ") .. " ]" - else - result_value = "[" .. table.concat(ITEMS, ",") .. "]" - end - - elseif object_keys then - -- - -- An object - -- - local TT = map or T - - if options.pretty then - - local KEYS = { } - local max_key_length = 0 - for _, key in ipairs(object_keys) do - local encoded = encode_value(self, tostring(key), parents, etc, options, indent) - if options.align_keys then - max_key_length = math.max(max_key_length, #encoded) - end - table.insert(KEYS, encoded) - end - local key_indent = indent .. tostring(options.indent or "") - local subtable_indent = key_indent .. string.rep(" ", max_key_length) .. (options.align_keys and " " or "") - local FORMAT = "%s%" .. string.format("%d", max_key_length) .. "s: %s" - - local COMBINED_PARTS = { } - for i, key in ipairs(object_keys) do - local encoded_val = encode_value(self, TT[key], parents, etc, options, subtable_indent) - table.insert(COMBINED_PARTS, string.format(FORMAT, key_indent, KEYS[i], encoded_val)) - end - result_value = "{\n" .. table.concat(COMBINED_PARTS, ",\n") .. "\n" .. indent .. "}" - - else - - local PARTS = { } - for _, key in ipairs(object_keys) do - local encoded_val = encode_value(self, TT[key], parents, etc, options, indent) - local encoded_key = encode_value(self, tostring(key), parents, etc, options, indent) - table.insert(PARTS, string.format("%s:%s", encoded_key, encoded_val)) - end - result_value = "{" .. table.concat(PARTS, ",") .. "}" - - end - else - -- - -- An empty array/object... we'll treat it as an array, though it should really be an option - -- - result_value = "[]" - end - - parents[T] = false - return result_value - end -end - - -function OBJDEF:encode(value, etc, options) - if type(self) ~= 'table' or self.__index ~= OBJDEF then - OBJDEF:onEncodeError("JSON:encode must be called in method format", etc) - end - return encode_value(self, value, {}, etc, options or nil) -end - -function OBJDEF:encode_pretty(value, etc, options) - if type(self) ~= 'table' or self.__index ~= OBJDEF then - OBJDEF:onEncodeError("JSON:encode_pretty must be called in method format", etc) - end - return encode_value(self, value, {}, etc, options or default_pretty_options) -end - -function OBJDEF.__tostring() - return "JSON encode/decode package" -end - -OBJDEF.__index = OBJDEF - -function OBJDEF:new(args) - local new = { } - - if args then - for key, val in pairs(args) do - new[key] = val - end - end - - return setmetatable(new, OBJDEF) -end - -return OBJDEF:new() - --- --- Version history: --- --- 20141223.14 The encode_pretty() routine produced fine results for small datasets, but isn't really --- appropriate for anything large, so with help from Alex Aulbach I've made the encode routines --- more flexible, and changed the default encode_pretty() to be more generally useful. --- --- Added a third 'options' argument to the encode() and encode_pretty() routines, to control --- how the encoding takes place. --- --- Updated docs to add assert() call to the loadfile() line, just as good practice so that --- if there is a problem loading JSON.lua, the appropriate error message will percolate up. --- --- 20140920.13 Put back (in a way that doesn't cause warnings about unused variables) the author string, --- so that the source of the package, and its version number, are visible in compiled copies. --- --- 20140911.12 Minor lua cleanup. --- Fixed internal reference to 'JSON.noKeyConversion' to reference 'self' instead of 'JSON'. --- (Thanks to SmugMug's David Parry for these.) --- --- 20140418.11 JSON nulls embedded within an array were being ignored, such that --- ["1",null,null,null,null,null,"seven"], --- would return --- {1,"seven"} --- It's now fixed to properly return --- {1, nil, nil, nil, nil, nil, "seven"} --- Thanks to "haddock" for catching the error. --- --- 20140116.10 The user's JSON.assert() wasn't always being used. Thanks to "blue" for the heads up. --- --- 20131118.9 Update for Lua 5.3... it seems that tostring(2/1) produces "2.0" instead of "2", --- and this caused some problems. --- --- 20131031.8 Unified the code for encode() and encode_pretty(); they had been stupidly separate, --- and had of course diverged (encode_pretty didn't get the fixes that encode got, so --- sometimes produced incorrect results; thanks to Mattie for the heads up). --- --- Handle encoding tables with non-positive numeric keys (unlikely, but possible). --- --- If a table has both numeric and string keys, or its numeric keys are inappropriate --- (such as being non-positive or infinite), the numeric keys are turned into --- string keys appropriate for a JSON object. So, as before, --- JSON:encode({ "one", "two", "three" }) --- produces the array --- ["one","two","three"] --- but now something with mixed key types like --- JSON:encode({ "one", "two", "three", SOMESTRING = "some string" })) --- instead of throwing an error produces an object: --- {"1":"one","2":"two","3":"three","SOMESTRING":"some string"} --- --- To maintain the prior throw-an-error semantics, set --- JSON.noKeyConversion = true --- --- 20131004.7 Release under a Creative Commons CC-BY license, which I should have done from day one, sorry. --- --- 20130120.6 Comment update: added a link to the specific page on my blog where this code can --- be found, so that folks who come across the code outside of my blog can find updates --- more easily. --- --- 20111207.5 Added support for the 'etc' arguments, for better error reporting. --- --- 20110731.4 More feedback from David Kolf on how to make the tests for Nan/Infinity system independent. --- --- 20110730.3 Incorporated feedback from David Kolf at http://lua-users.org/wiki/JsonModules: --- --- * When encoding lua for JSON, Sparse numeric arrays are now handled by --- spitting out full arrays, such that --- JSON:encode({"one", "two", [10] = "ten"}) --- returns --- ["one","two",null,null,null,null,null,null,null,"ten"] --- --- In 20100810.2 and earlier, only up to the first non-null value would have been retained. --- --- * When encoding lua for JSON, numeric value NaN gets spit out as null, and infinity as "1+e9999". --- Version 20100810.2 and earlier created invalid JSON in both cases. --- --- * Unicode surrogate pairs are now detected when decoding JSON. --- --- 20100810.2 added some checking to ensure that an invalid Unicode character couldn't leak in to the UTF-8 encoding --- --- 20100731.1 initial public release --- diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..800baae --- /dev/null +++ b/Makefile @@ -0,0 +1,7 @@ +run: + make clean + docker build -t img . + docker run --rm img test + +clean: + rm -rf img diff --git a/bad-params.txt b/bad-params.txt deleted file mode 100644 index 24dcbb0..0000000 --- a/bad-params.txt +++ /dev/null @@ -1,64 +0,0 @@ -utm_source -utm_medium -utm_campaign -utm_term -utm_content -utm_adgroup -ref -refsrc -referrer_id -referrerid -src -i -s -ts -feature -jsessionid -phpsessid -aspsessionid -sessionid -zenid -sid -gclid -fb_xd_fragment -fb_comment_id -fbclid -cfid -cftoken -doing_wp_cron -pk_cpn -pk_campaign -pk_kwd -pk_keyword -piwik_campaign -piwik_kwd -ga_source -ga_medium -ga_term -ga_content -ga_campaign -ga_place -yclid -_openstat -fb_action_ids -fb_action_types -fb_source -fb_ref -action_object_map -action_type_map -action_ref_map -gs_l -mkt_tok -hmb_campaign -hmb_medium -hmb_source -rand -wicket:antiCache -cachebuster -nocache -vs -dilid -script_case_session -cid -extid -_flowexecutionkey diff --git a/bad-patterns.txt b/bad-patterns.txt deleted file mode 100644 index 9427158..0000000 --- a/bad-patterns.txt +++ /dev/null @@ -1,33 +0,0 @@ -/action/consumeSharedSessionAction -/action/consumeSsoCookie -/action/getSharedSiteSession -/juris/error%.jsf -facebook%.com/login%.php -facebook%.com/cookie/ -facebook%.com/plugins/ -facebook%.com/sharer/ -facebook%.com/sharer%.php -gongquiz%.com.+&historyNo=[0-9]+ -univis%.univie%.ac%.at/ausschreibungstellensuche/ -fundraise%.cancerresearchuk%.org/signup/account/ -mma%.ft%.com -^https?://dmg%.go%-2b%-planer%.de/ -^https?://3d%.espace%-aubade%.fr/ -^https?://kuechenplaner%.[^/]+/cloud/ -^https?://3d%-salledebains%.geberit%.fr/ -^https?://bibliotekanauki%.ceon%.pl/yadda/search/general%.action -^https?://[^/]+%.icm%.edu%.pl/.*search/article%.action -^https?://interamt%.de/koop/app/ -^https?://tesiunam%.dgb%.unam%.mx/F/ -^https?://[^%.]+%.sedelectronica%.es/.*%?x= -^https?://www%.cp%-cc%.org/programs%-services/ -/ibank/_crypt_ -%%7B%%7B.+%%7D%%7D -^https?://[^/]+/" -^http://[0-9a-z][0-9a-z][0-9a-z][0-9][0-9][0-9]?%.[^%./]+%.com/$ -^http://[0-9a-z][0-9a-z][0-9a-z][0-9][0-9][0-9]?%.[^%./]+%.com/[a-z]+%.?[a-z][a-z][a-z]?$ -^http://[0-9a-z][0-9a-z][0-9a-z][0-9][0-9][0-9]?%.[^%./]+%.com/[a-z]+/[a-z]+[0-9]*%.?[a-z][a-z][a-z]?$ -^https?://[^/]*yahoo%.com/.+%%5C.+at%.atwola%.com -^https?://[^/]*at%.atwola%.com/ -^https?://www%.bafa%.de/ -%%5C%%22 diff --git a/boilerplate.py b/boilerplate.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/boilerplate.py @@ -0,0 +1 @@ + diff --git a/extract-outlinks-patterns.txt b/extract-outlinks-patterns.txt deleted file mode 100644 index bfa4645..0000000 --- a/extract-outlinks-patterns.txt +++ /dev/null @@ -1,1112 +0,0 @@ -15min.org -15minut.org -1prime.biz -24-ore.com -24.ae -7days.ae -8am.af -aamulehti.fi -abc.news.go.com -abstvradio.com -accringtonobserver.co.uk -acn.cu -ad.nl -adelante.cu -adigea.aif.ru -adsports.ae -adventure.nationalgeographic.com -af.farsnews.com -af.reuters.com -afghan-review.com -afghanews.ir -afghanislamicpress.com -afghanistannewscenter.com -afghanistansun.com -afghanistantimes.af -afghanpaper.com -aftenposten.no -aftonbladet.se -agora.co.ao -ahora.cu -ahram.org.eg -aif.ru -aiis-albania.org -airdrieecho.com -akhbarelyoum.dz -akhersaa-dz.com -al-fadjr.com -al-monitor.com -alalam.ir -alarabiya.net -albawabaeg.com -albayan.ae -albertafarmexpress.ca -alfajr-news.net -alger-info.com -algerie-focus.com -algerieconfluences.com -alhadath.net -alittihad.ae -aljazeera.com -alkhaleej.ae -alkmaarnieuws.nl -almasdarnews.com -almogaz.com -alquds.co.uk -alroeya.ae -alrugby.com -alseyassi-dz.com -alwahdanews.ae -alwatannewspaper.ae -am.radiovaticana.va -ambito.com -ameinfo.com -ana.ad -andorradifusio.ad -angliya.com -angonoticias.com -annasrdz.com -annasronline.com -antiguaobserver.com -aopnews.com -ap22.ru -apple.com -appledaily.com.tw -appleinsider.com -appleinsider.net -appleinsider.ru -aps.dz -ar.farsnews.com -ar.radiovaticana.va -ar.reuters.com -ar.timesofisrael.com -ara.ad -ara.reuters.com -arabianbusiness.com -arabic.sport360.com -ariananet.com -arnhemnieuws.nl -arstechnica.com -asia.nikkei.com -aspi.org.au -aspistrategist.org.au -astro.fashion.qq.com -aswatmasriya.com -atlantic.ctvnews.ca -ausairpower.net -auto.qq.com -autonews.ru -b.dk -baby.qq.com -bakhtarnews.com.af -balkanweb.com -banglaexpress.ae -barkinganddagenhampost.co.uk -barrie.ctvnews.ca -barrietoday.com -baytoday.ca -bbc.co.uk -bbc.com -bc.ctvnews.ca -be.radiovaticana.va -becclesandbungayjournal.co.uk -belegger.nl -benawa.com -bencarson.com -berlinergazette.de -berniesanders.com -bet.nl -beuningennieuws.nl -bexleytimes.co.uk -bg.radiovaticana.va -bgr.com -bigstory.ap.org -birminghammail.co.uk -birminghampost.co.uk -biz.tpo.nl -blackcountrybugle.co.uk -blikopnieuws.nl -blog.archive.org -blog.cleveland.com -blog.lesoir.be -blogs.canoe.com -blogs.wsj.com -blogues.canoe.ca -bloomberg.co.jp -bloomberg.com -bna.bh -bnn.ca -bnr.nl -boingboing.net -bondebladet.no -bondia.ad -book.qq.com -boxmeernieuws.nl -br.radiovaticana.va -br.reuters.com -br.wsj.com -brantfordexpositor.ca -bredajournaal.nl -breitbart.com -brisinst.org.au -bromleytimes.co.uk -bt.dk -btp-dz.com -buenosairesherald.com -burymercury.co.uk -buzzfeed.com.au -ca.reuters.com -calgary.ctvnews.ca -calgaryherald.com -calgarysun.com -cambstimes.co.uk -cameroonpostline.com -camrosecanadian.com -cankaoxiaoxi.com -capitalnewyork.com -catholicherald.co.uk -ceda.com.au -cesd.az -channel4.com -channelnewsasia.com -chathamthisweek.com -chealth.canoe.com -chesterchronicle.co.uk -china.kyodonews.jp -chinatimes.com -chosonsinbo.com -chosun.com -chrischristie.com -chron.com -chroniclelive.co.uk -chroom.tpo.nl -cis.org.au -citynews.ca -clarin.com -class.qq.com -cleveland.com -clintonnewsrecord.com -cn.ibtimes.com -cn.nytimes.com -cn.timesofisrael.com -cn.wsj.com -cnbc.com -cnea.gov.ar -cnet.com -cnews.ru -coastalscene24.co.uk -cochranetimes.com -cochranetimespost.ca -collections.unu.edu -competition.dz -computersweden.idg.se -conae.gov.ar -conicet.gov.ar -contenidos.lanacion.com.ar -coventryobserver.co.uk -coventrytelegraph.net -cp24.com -cpd.org.au -cphpost.dk -cranbrookherald.com -crewechronicle.co.uk -cronica.com.ar -cronicamendoza.com -cs.radiovaticana.va -ctvnews.ca -cubainfo.acn.cu -cubanews.acn.cu -cubasi.com -cubasi.cu -cuijknieuws.nl -cul.qq.com -cult.tpo.nl -cultofandroid.com -cultofandroid.com.feedsportal.com -cultofmac.com -cultofmac.com.feedsportal.com -cultofmac.com.ua -cybersecuritydojo.com -czechcrunch.cz -dagelijksestandaard.nl -dagen.no -dagen.se -dagens.dk -dagogtid.no -dagsavisen.no -daily-mail.co.zm -dailyafghanistan.com -dailyfinance.com -dailyheraldtribune.com -dailymail.co.uk -dailynewsegypt.com -dailynk.com -dailypost.co.uk -dailyrecord.co.uk -dailystar.co.uk -dailystar.com.lb -dajia.qq.com -dari.wadsam.com -data.gdeltproject.org -de.radiovaticana.va -de.reuters.com -delfi.lt -demokraatti.fi -demorgen.be -denboschnieuws.nl -depechedekabylie.com -derehamtimes.co.uk -destructoid.com -deutschlandradio.de -deventerjournaal.nl -devpolicy.crawford.anu.edu.au -devpolicy.org -di.se -diariandorra.ad -diariobae.com -diariopopular.com.ar -diarioshow.com -digi.tech.qq.com -dissmercury.co.uk -dn.no -dn.se -dnaindia.com -dnd.nl -docsalud.com -donaldjtrump.com -donbalon.com -donbalon.eu -dp.ru -dprktoday.com -dr.dk -draytonvalleywesternreview.com -dubaichronicle.com -dunmowbroadcast.co.uk -dutchdailynews.com -dutchinamerica.com -dutchnews.nl -dw.com -eaber.org -eadt.co.uk -eastasiaforum.org -eastlondonadvertiser.co.uk -ech-chaab.com -echoroukonline.com -economictimes.indiatimes.com -edition.cnn.com -edmonton.ctvnews.ca -edmontonjournal.com -edmontonsun.com -edsonleader.com -edu.qq.com -eg-online.ru -eindhovennieuws.nl -ekstrabladet.dk -el-hakaek.com -el-hourria.com -el-massa.com -el-youm.info -elahdath.net -elbilad.net -elciudadanoweb.com -elcolombiano.com -eldjoumhouria.dz -electronicintifada.net -elheddaf.com -elkhabar.com -elkhabarerriadhi.com -elliotlaketoday.com -elmakam.com -elmassar-ar.com -elmoudjahid.com -elperiodic.ad -elraaed.com -elsevier.nl -elwatan.com -elystandard.co.uk -emaratalyoum.com -emirates247.com -en.alalam.ir -en.aswatmasriya.com -en.farsnews.com -en.gigazine.net -en.hawarnews.com -en.novayagazeta.ru -en.radiovaticana.va -en.video.canoe.tv -engadget.com -english.ahram.org.eg -english.chosun.com -english.juventudrebelde.cu -english.kyodonews.jp -english.yonhapnews.co.kr -ennaharonline.com -ent.qq.com -entv.dz -environment.nationalgeographic.com -eo.radiovaticana.va -eqmweekly.com.af -es.hawarnews.com -es.radiovaticana.va -es.reuters.com -escambray.cu -ess.fi -etn.fi -eufin.nl -euronews.com -evatt.org.au -eveningnews24.co.uk -exame.co.ao -examiner.co.uk -exiledonline.com -exmouthherald.co.uk -exmouthjournal.co.uk -express.co.uk -expressandstar.com -expressen.se -fa.timesofisrael.com -fakenhamtimes.co.uk -farsnews.com -fashion.qq.com -fd.nl -feeds.24.com -feeds.arstechnica.com -feeds.bbci.co.uk -feeds.cnevids.com -feeds.feedburner.com -feeds.feedburner.jp -feeds.gawker.com -feeds.government.nl -feeds.huffingtonpost.com -feeds.ign.com -feeds.kauppalehti.fi -feeds.macrumors.com -feeds.mashable.com -feeds.news24.com -feeds.nytimes.com -feeds.sciencedaily.com -feeds.skynews.com -feeds.washingtonpost.com -feeds.webwereld.nl -feeds.wsjonline.com -feeds2.feedburner.com -feweek.co.uk -fi.radiovaticana.va -fightland.vice.com -finance.qq.com -#finance.yahoo.com -fiskeribladet.no -flip.channelnewsasia.com -forbes.com -fortmcmurraytoday.com -fortsaskatchewanrecord.com -forum.ad -foxnews.com -foxue.qq.com -fr.canoe.ca -fr.radiovaticana.va -fr.reuters.com -fr.timesofisrael.com -fr.video.canoe.tv -france24.com -frenchwam.com -friheten.no -frontpage.fok.nl -ft.com -ftp3.conae.gov.ar -fullfact.org -futbolete.com -games.qq.com -gamespy.dk -gazeta-pravda.ru -gazeta55.al -gazetanovgorod.ru -gazetayakutia.ru -gazettelive.co.uk -gazettetimes.com -getbucks.co.uk -gethampshire.co.uk -getreading.co.uk -getsurrey.co.uk -getwestlondon.co.uk -gfwadvertiser.ca -gigazine.net -gizmodo.com -globalnews.ca -godubai.com -gongyi.qq.com -googleblog.blogspot.com -googleblog.blogspot.nl -gov.uk -government.nl -gp.se -granma.cu -grattan.edu.au -gravesendreporter.co.uk -greatyarmouthmercury.co.uk -greenun24.co.uk -groningenjournaal.nl -guardian.ng -guccifer2.wordpress.com -guelphtoday.com -guerrillero.cu -gulfnews.com -gulftoday.ae -guruwatch.nl -gva.be -haaretz.co.il -haaretz.com -hackneygazette.co.uk -halifaxtoday.ca -hamhigh.co.uk -hamhighbroadway.co.uk -hannaherald.com -hardenbergnieuws.nl -hawarnews.com -hbl.fi -hd.stheadline.com -he.radiovaticana.va -health.qq.com -health.usnews.com -helsinkitimes.fi -heraldlive.co.za -hertsad.co.uk -heute.de -heyetnet.org -hi.radiovaticana.va -highrivertimes.com -hillaryclinton.com -hinckleytimes.net -hintonparklander.com -hk.on.cc -hln.be -horizons-dz.com -house.qq.com -hr.radiovaticana.va -hrnicholls.com.au -hs.fi -hu.radiovaticana.va -huffingtonpost.com -huntspost.co.uk -hy.radiovaticana.va -i-d.vice.com -iamexpat.nl -ib.edu.ar -ibinda.com -ibtimes.co.in -ibtimes.co.uk -ibtimes.com -ibtimes.com.au -icelandreview.com -idag.no -iex.nl -iexgeld.nl -iexprofs.nl -ilfordrecorder.co.uk -ilkka.fi -iltalehti.fi -iltasanomat.fi -in.reuters.com -independent.co.uk -indianexpress.com -infocanuelas.com -infosoir.com -infoworld.com -inta.gob.ar -intelligencer.ca -international.nytimes.com -internationalaffairs.org.au -inti.gob.ar -inti.gov.ar -invasor.cu -io-tech.fi -ipa.org.au -ipolitics.ca -ips.cap.anu.edu.au -ipswichstar.co.uk -iraq-amsi.net -irna.ir -islingtongazette.co.uk -it.ibtimes.com -it.reuters.com -itv.com -itviikko.fi -izvestia.ru -ja.radiovaticana.va -japantimes.co.jp -jeugdjournaal.nl -jeune-independant.net -jia360.com -johnkasich.com -joop.nl -jornaldeangola.sapo.ao -jornaldosdesportos.sapo.ao -jornalf8.net -journaldemontreal.com -jp.ibtimes.com -jp.reuters.com -jp.techcrunch.com -jp.vice.com -jp.wsj.com -juventudrebelde.cu -jyllands-posten.dk -kabayanweekly.com -kabulpress.org -kaleva.fi -kansalainen.fi -kansanuutiset.fi -karjalainen.fi -karjalansanomat.ru -kawalisse.com -kbctv.co.ke -kenoradailyminerandnews.com -kentnews.co.uk -kentonline.co.uk -khaama.com -khabarafghan.com -khaleejtimes.com -kid.qq.com -kids.nationalgeographic.com -kilburntimes.co.uk -kincardinenews.com -kingstonthisweek.com -kitchener.ctvnews.ca -klassekampen.no -kodima.rkperiodika.ru -kohajone.com -kommersant.ru -koreatimes.co.kr -kotaku.com -kp.ru -kr.nknews.org -kr.radiovaticana.va -kristeligt-dagblad.dk -ksml.fi -ktimes.com -ku.hawarnews.com -lacapital.com.ar -lactualite-dz.info -lakeshoreadvance.com -lanacion.com.ar -lanueva.com -lapinkansa.fi -laprensa.com.ar -lapresse.tn -larawbar.net -larazon.com.ar -lat.wsj.com -latimes.com -latribune-dz.com -lautomarche.com -lavoz.com.ar -lawandtax-news.com -leaderpost.com -lejourdalgerie.com -leloir.org.ar -lemaghrebdz.com -lematindz.net -lemauricien.com -lemidi-dz.com -lemonde.fr -leparisien.fr -lequotidien-oran.com -lesnouvellesnews.fr -lesoir.be -lesoirdalgerie.com -lestrepublicain.com -letempsdz.com -lexpressiondz.com -lfpress.com -lgz.ru -liberte-algerie.com -libyaherald.com -lifehacker.com -live.huffingtonpost.com -liveleak.com -liverpoolecho.co.uk -lnr-dz.com -london.ctvnews.ca -looopings.nl -losandes.com.ar -loughboroughecho.net -lowestoftjournal.co.uk -lowyinstitute.org -lrt.lt -lt.radiovaticana.va -lta.reuters.com -ltn.com.tw -lv.radiovaticana.va -maaseuduntulevaisuus.fi -macclesfield-express.co.uk -mackungfu.org -macleans.ca -macrumors.com -macrumors.ro -madagascar-tribune.com -madamasr.com -mailonsunday.co.uk -managementherald.com.ar -manchestereveningnews.co.uk -mandegardaily.com -mannkal.org -mannwest.com -marcorubio.com -marketwatch.com -marmai.fi -marsad.ly -mashable.com -mashable.pw -mayerthorpefreelancer.com -media.tpo.nl -menziesrc.org -meridianbooster.com -mes.ad -metro.co.uk -metro.fi -metro.se -metrohk.com.hk -metronews.ca -metronieuws.nl -mg.co.za -middleeasteye.net -midnorthmonitor.com -midweekherald.co.uk -mikrobitti.fi -mil.qq.com -mingpao.com -mirror.co.uk -mk.radiovaticana.va -mk.ru -mkset.ru -ml.radiovaticana.va -mn.ru -mobilefeeds.wsj.com -moheet.com -money.rbc.ru -money.usnews.com -monitor.co.ug -montreal.ctvnews.ca -montrealgazette.com -morgenbladet.no -morningstaronline.co.uk -mospravda.ru -motherboard.vice.com -motors-dz.com -mountain-news.com -msnbc.com -munchies.vice.com -mundod.lavoz.com.ar -mx.dk -mx.reuters.com -naenara.com.kp -nanaimodailynews.com -nantonnews.com -nasdaq.com -nation.co.ke -nationalobserver.com -nationalpost.com -nationen.no -navbharattimes.indiatimes.com -nbcnews.com -nd.nl -nederlandnieuws.nl -nerjanieuws.nl -newburytoday.co.uk -newhamrecorder.co.uk -newizv.ru -newlookmedia.ru -news.com.au -news.cubasi.cu -news.ltn.com.tw -news.mingpao.com -news.nationalgeographic.com -news.nationalpost.com -news.qq.com -news.sky.com -news.tbs.co.jp -news.vice.com -news.vip-urlaub.de -news.yahoo.com -news24.com -newscentralasia.net -newsletter.co.uk -newsmonkey.be -newsrss.bbc.co.uk -newtimes.co.rw -newvision.co.ug -ng.ru -niagarafallsreview.ca -nieuws.tpo.nl -nijmegennieuws.nl -nikkei.com -nisnews.nl -nknews.org -nltimes.nl -noisey.vice.com -north-africa.com -northdevongazette.co.uk -northernontario.ctvnews.ca -northnorfolknews.co.uk -northsomersettimes.co.uk -norwichadvertiser24.co.uk -norwichgazette.com -nos.nl -notinet.icrt.cu -novayagazeta.ru -novojornal.co.ao -novosti.acn.cu -npr.org -nrc.nl -nrk.no -nsl-basketball.sport360.com -nsl-football.sport360.com -nsl.sport360.com -nu.nl -nugget.ca -nunatsiaqonline.ca -nycity.today -nyheder.tv2.dk -nypost.com -nytid.no -nytimes.com -nzherald.co.nz -o.canada.com -og.ru -ohio.com -one.iex.nl -onionstudios.com -opais.co.ao -orientaldaily.on.cc -osservatoreromano.va -ossnieuws.nl -ottawa.ctvnews.ca -ottawacitizen.com -ottawasun.com -ouarsenis.com -ouest-france.fr -ouestribune-dz.com -ourworld.unu.edu -outlookafghanistan.net -owensoundsuntimes.com -oxfordtimes.co.uk -pagina12.com.ar -pajhwok.com -panorama-sport.com -panorama.com.al -parool.nl -participaties.nl -pdc.tv -percapita.org.au -periodico26.cu -photography.nationalgeographic.com -pinchercreekecho.com -pl.radiovaticana.va -pm.gc.ca -pnp.ru -politico.com -politico.eu -politiek.tpo.nl -politifact.com -polygon.com -portalangop.co.ao -portfolio.lesoir.be -postzambia.com -powned.tv -pqbnews.com -pressandjournal.co.uk -presstv.ir -prnewsonline.com -prosper.org.au -province.ru -prrecordgazette.com -pt.radiovaticana.va -qq.com -quote.rbc.ru -quotidien-oran.com -radio.nrk.no -radioalgerie.dz -radiolome.tg -randpaul.com -raqqa-sl.com -rawstory.com -rbc.ru -rbth.com -readwrite.com -recorder.ca -redstar.ru -refdag.nl -regina.ctvnews.ca -regio.tpo.nl -republicoftogo.com -reuters.com -rg.ru -ria.ru -rionegro.com.ar -ro.radiovaticana.va -rodong.rep.kp -romfordrecorder.co.uk -rossendalefreepress.co.uk -royston-crow.co.uk -rss.canada.com -rss.canoe.com -rss.cnn.com -rss.dw.com -rss.feedsportal.com -rss.nytimes.com -rss.upi.com -rt.com -rtl7darts.nl -rtlnieuws.nl -ru.hawarnews.com -ru.radiovaticana.va -ru.reuters.com -rumbosdigital.com -ruokala.net -ruscur.ru -sabawoon.com -sackvilletribunepost.com -saffronwaldenreporter.co.uk -sam.az -sammobile.com -sargasso.nl -saskatoon.ctvnews.ca -satakunnankansa.fi -saultstar.com -savonsanomat.fi -sawt-alahrar.net -sci-news.com -sciencedaily.com -sciencenews.org -scotlandnow.dailyrecord.co.uk -semanarioeconomico.co.ao -sfgate.com -sidmouthherald.co.uk -siliconprairienews.com -simcoereformer.ca -sk.radiovaticana.va -sl.radiovaticana.va -sobesednik.ru -sootoday.com -sot.com.al -southportvisiter.co.uk -sovsakh.ru -sovsport.ru -spbvedomosti.ru -spiegel.de -sport-express.ru -sport.rbc.ru -sport360.com -sports.qq.com -sports.vice.com -#sports.yahoo.com -sq.radiovaticana.va -standaard.be -standard-freeholder.com -standard.co.uk -static.feed.rbc.ru -stcatharinesstandard.ca -std.stheadline.com -stheadline.com -stock.qq.com -stowmarketmercury.co.uk -stratfordbeaconherald.com -strathmorestandard.com -stthomastimesjournal.com -student.societyforscience.org -sudburymercury.co.uk -sunnewsonline.com -suomenmaa.fi -suomenuutiset.fi -super.ae -sustg.com -sv.radiovaticana.va -svd.se -svenska.yle.fi -svt.se -sw.radiovaticana.va -ta.radiovaticana.va -taand.com -tagesschau.de -tai.org.au -taipeitimes.com -talk.tpo.nl -taloussanomat.fi -tchina.kyodonews.jp -tech.qq.com -techcrunch.asia -techcrunch.cn -techcrunch.com -techradar.me -tedcruz.org -tehrantimes.com -tekniikanmaailma.fi -telegraaf.nl -telegraph.co.uk -thanhnien.vn -the-japan-news.com -theantiguan.com -thearabianpost.com -theatlantic.com -theautonet.com -thebeaverton.com -thechronicleherald.ca -thecomet.net -thecragandcanyon.ca -thecreatorsproject.vice.com -thedailyobserver.ca -thedailystar.net -theglobeandmail.com -theguardian.com -thehindu.com -theindependent.co.zw -theintercept.com -thelocal.fr -themoscowtimes.com -thenational.ae -thenationalstudent.com -thenextweb.com -theonion.com -thepeterboroughexaminer.com -theprovince.com -theregister.co.uk -therwandan.com -thestage.co.uk -thestandard.com.hk -thestar.com -thestarphoenix.com -thesudburystar.com -thesun.co.uk -thesydneyinstitute.com.au -thetfordandbrandontimes.co.uk -thetimes.co.uk -theverge.com -theweathernetwork.com -thewestonmercury.co.uk -thewhig.com -thisdaylive.com -thump.vice.com -ti.radiovaticana.va -tielnieuws.nl -tilburgnieuws.nl -time.com -times.co.zm -timescolonist.com -timesofindia.indiatimes.com -timesofisrael.com -timminspress.com -timminstoday.com -tivi.fi -tmz.com -today.ng -todayszaman.com -togozine.com -tolafghan.com -tomshardware.com -toronto.ctvnews.ca -torontosun.com -torrentfreak.com -tpo.nl -tr.farsnews.com -tr.hawarnews.com -trabajadores.cu -transactiondalgerie.com -travel.nationalgeographic.com -travel.usnews.com -tribune.com.pk -trouw.nl -trud.ru -ts.fi -tumentoday.ru -tuoitrenews.vn -tv.echoroukonline.com -tv.rbc.ru -tverlife.ru -tvt.tg -tweakers.net -twenterandnieuws.nl -uaeinteract.com -udennieuws.nl -udn.com -uk.radiovaticana.va -uk.reuters.com -unu.edu -upi.com -uriminzokkiri.com -usatoday.com -usnews.com -ussc.edu.au -utrechtjournaal.nl -uusisuomi.fi -v.qq.com -vancouverisland.ctvnews.ca -vancouverobserver.com -vanguardia.cu -vanguardngr.com -vaterland.li -vechorka.ru -vedomosti.ru -veghelnieuws.nl -veintitres.com.ar -vendingtimes.com -verkkouutiset.fi -vi.radiovaticana.va -vice.cn -vice.com -viceland.com -video.asia.nikkei.com -video.cnbc.com -video.nationalgeographic.com -video.usnews.com -video.vice.com -video.wired.com -videos.leparisien.fr -vihrealanka.fi -vl.no -vmnews.ru -vn.ru -volkskrant.nl -vos.lavoz.com.ar -vremya.ru -vulcanadvocate.com -wadsam.com -wakteldjazair.com -walesonline.co.uk -wam.ae -washingtonpost.com -wattonandswaffhamtimes.co.uk -waveneyadvertiser24.co.uk -web.kbcalgerie.tv -webwereld.nl -wharf.co.uk -whitecourtstar.com -whtimes.co.uk -wijchennieuws.nl -wikileaks.org -windsor.ctvnews.ca -windsorstar.com -winnipeg.ctvnews.ca -winnipegsun.com -wired.com -wisbechstandard.co.uk -woodstocksentinelreview.com -wsj.com -www3.nhk.or.jp -wymondhamandattleboroughmercury.co.uk -xinhuanet.com -yarmouthadvertiser24.co.uk -yemen-nn.com -yenisafak.com -yle.fi -ynet.co.il -ynetnews.com -yomiuri.co.jp -yonhapnews.co.kr -yonhapnews.feedsportal.com -ypgrojava.com -zaman.com.tr -zamanarabic.com -zamanfrance.fr -zamankurdi.com -zh.radiovaticana.va -zwollenieuws.nl - diff --git a/gmd.lua b/gmd.lua new file mode 100644 index 0000000..87641a5 --- /dev/null +++ b/gmd.lua @@ -0,0 +1,122 @@ +strin = "2~NzUwMCBzdGFycyBjOg==~4~3~9~1 month~6~1803945|2~SSBiZWF0IDYgaW5zYW5lIGRlbW9ucyBpbiAyNCBob3VycyBsbWFvOiBOZWNyb3BvbGlzLCBUaGUgQ2F2ZXJucyBJSSwgRWxlbWVudHMgWCwgWCBBZHZlbnR1cmUsIFNhZGlzbSwgYW5kIEJsYXN0ZXIgYzo=~4~21~9~8 months~6~1793260|2~L1wvXC9cIDwz~4~6~9~1 year~6~1785414|2~U2VudCBmcm9tIGlPUyBTaG9ydGN1dHMh~4~8~9~1 year~6~1776426|2~VGhpcyBjb21tZW50IHdhcyB1cGxvYWRlZCBmb3IgdGhlIEdEIERvY3Mh~4~5~9~1 year~6~1772719|2~VGhlIHRyaWxvZ3kgaGFzIGJlZW4gY29tcGxldGVkLi4uR0cgQWZ0ZXJtYXRoIQ==~4~8~9~1 year~6~1766450|2~Im93byIgLSBGb3VuZG15YmFsbA==~4~4~9~1 year~6~1766338|2~NTAwMCBzdGFycyE=~4~12~9~2 years~6~1756926|2~Qmxvb2RiYXRoIEdHISEh~4~24~9~2 years~6~1745624|2~QWxsZWdpYW5jZSAxMDAl~4~3~9~2 years~6~1744292#73:0:10" + +-- https://stackoverflow.com/questions/40149617/split-string-with-specified-delimiter-in-lua +function split(s, sep) + local fields = {} + + local sep = sep or " " + local pattern = string.format("([^%s]+)", sep) + string.gsub(s, pattern, function(c) fields[#fields + 1] = c end) + + return fields +end +-- https://stackoverflow.com/questions/40149617/split-string-with-specified-delimiter-in-lua +-- +GMD = {} +GMD["comments"] = {} + +function table.show(t, name, indent) + local cart -- a container + local autoref -- for self references + + --[[ counts the number of elements in a table + local function tablecount(t) + local n = 0 + for _, _ in pairs(t) do n = n+1 end + return n + end + ]] + -- (RiciLake) returns true if the table is empty + local function isemptytable(t) return next(t) == nil end + + local function basicSerialize (o) + local so = tostring(o) + if type(o) == "function" then + local info = debug.getinfo(o, "S") + -- info.name is nil because o is not a calling level + if info.what == "C" then + return string.format("%q", so .. ", C function") + else + -- the information is defined through lines + return string.format("%q", so .. ", defined in (" .. + info.linedefined .. "-" .. info.lastlinedefined .. + ")" .. info.source) + end + elseif type(o) == "number" or type(o) == "boolean" then + return so + else + return string.format("%q", so) + end + end + + local function addtocart (value, name, indent, saved, field) + indent = indent or "" + saved = saved or {} + field = field or name + + cart = cart .. indent .. field + + if type(value) ~= "table" then + cart = cart .. " = " .. basicSerialize(value) .. ";\n" + else + if saved[value] then + cart = cart .. " = {}; -- " .. saved[value] + .. " (self reference)\n" + autoref = autoref .. name .. " = " .. saved[value] .. ";\n" + else + saved[value] = name + --if tablecount(value) == 0 then + if isemptytable(value) then + cart = cart .. " = {};\n" + else + cart = cart .. " = {\n" + for k, v in pairs(value) do + k = basicSerialize(k) + local fname = string.format("%s[%s]", name, k) + field = string.format("[%s]", k) + -- three spaces between levels + addtocart(v, fname, indent .. " ", saved, field) + end + cart = cart .. indent .. "};\n" + end + end + end + end + + name = name or "__unnamed__" + if type(t) ~= "table" then + return name .. " = " .. basicSerialize(t) + end + cart, autoref = "", "" + addtocart(t, name, indent) + return cart .. autoref +end + +GMD["comments"]["parse"] = function(comment) + local splitted = split(comment, ":") + local retern = {} + retern.comment = splitted[1] + retern.account = splitted[2] + + retern.parsed = {} + retern.parsed.comment = {} + local data = split(retern.comment, "|") + for i=1, #data do + retern.parsed.comment[i] = {} + -- comment parser + local ndata = split(data[i], "~") + for j=1, #ndata do + if not (j % 2 == 0) then -- key + key = ndata[j] + else -- value + local value = ndata[j] + retern.parsed.comment[i][key] = value + end + end + end + -- print("DONE") + -- print(table.show(retern.parsed.comment)) + + return retern +end +print(GMD["comments"]["parse"](strin)) diff --git a/grab.lua b/grab.lua new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/grab.lua @@ -0,0 +1 @@ + diff --git a/ignore-patterns.txt b/ignore-patterns.txt deleted file mode 100644 index 12b7039..0000000 --- a/ignore-patterns.txt +++ /dev/null @@ -1,21 +0,0 @@ -[%?&]ver=[0-9a-zA-Z%.]*%.16[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9] -[%?&]ver=16[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9] -[%?&]t=16[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]$ -[%?&]t=16[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]%.[0-9]+$ -[%?&]hash=16[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]$ -%?16[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]$ -%?16[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]$ -%?6[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]$ -%?v=[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]$ -;extid=[0-9a-f]+$ -[%?&;]_flowexecutionkey= -[%?&;]sid= -[%?&;]cid= -[%?&;]jsessionid= -[%?&;]script_case_session= -[%?&;]Dilid= -[%?&;][pP][hH][pP][sS][eE][sS][sS][iI][dD]= -[%?&;]wtd= -[%?&;]nonce= -[%?&;]rnd= -^https?://[^/]+/index%.php%?s= diff --git a/page-requisite-patterns.txt b/page-requisite-patterns.txt deleted file mode 100644 index f519342..0000000 --- a/page-requisite-patterns.txt +++ /dev/null @@ -1,17 +0,0 @@ -%.apng -%.avif -%.gif -%.jpe?g -%.jfif -%.pjpeg -%.pjp -%.png -%.svg -%.webp -%.bmp -%.ico -%.cur -%.tif -%.tiff -%.js -%.css diff --git a/pipeline.py b/pipeline.py index d05b1bb..8dd0208 100644 --- a/pipeline.py +++ b/pipeline.py @@ -1,50 +1,41 @@ -# encoding=utf8 -import datetime -from distutils.version import StrictVersion -import hashlib -import json -import os -import random -import shutil -import socket -import subprocess -import sys -import threading -import time -import string -import sys +################### +###GEOMETRY DASH### +###GRAB SCRIPTS#### +################### -if sys.version_info[0] < 3: - from urllib import unquote - from urlparser import parse_qs -else: - from urllib.parse import unquote, parse_qs +# Based heavily off of ArchiveTeam/urls-grab -import requests import seesaw -from seesaw.config import realize, NumberConfigValue +from seesaw.project import * +from seesaw.tracker import * +from seesaw.util import * +from seesaw.pipeline import Pipeline from seesaw.externalprocess import WgetDownload from seesaw.item import ItemInterpolation, ItemValue -from seesaw.pipeline import Pipeline -from seesaw.project import Project from seesaw.task import SimpleTask, LimitConcurrent -from seesaw.tracker import GetItemFromTracker, PrepareStatsForTracker, \ - UploadWithTracker, SendDoneToTracker -from seesaw.util import find_executable -import zstandard -if StrictVersion(seesaw.__version__) < StrictVersion('0.8.5'): - raise Exception('This pipeline needs seesaw version 0.8.5 or higher.') - -LOCK = threading.Lock() +import hashlib +import shutil +import socket +import sys +project = Project( + title = "Geometry Dash", + project_html = """ +

Geometry Dash

+

Time to archive Geometry Dash?

+ """, +) ########################################################################### -# Find a useful Wget+Lua executable. +# The version number of this pipeline definition. # -# WGET_AT will be set to the first path that -# 1. does not crash with --version, and -# 2. prints the required version string +# Update this each time you make a non-cosmetic change. +# It will be added to the WARC files and reported to the tracker. +VERSION = '20220428.01' +#USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.183 Safari/537.36' +TRACKER_ID = 'geometrytrash' +TRACKER_HOST = '172.17.0.1:8501' WGET_AT = find_executable( 'Wget+AT', @@ -60,25 +51,6 @@ WGET_AT = find_executable( if not WGET_AT: raise Exception('No usable Wget+At found.') - -########################################################################### -# The version number of this pipeline definition. -# -# Update this each time you make a non-cosmetic change. -# It will be added to the WARC files and reported to the tracker. -VERSION = '20220423.01' -#USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.183 Safari/537.36' -TRACKER_ID = 'urls' -TRACKER_HOST = 'legacy-api.arpa.li' -MULTI_ITEM_SIZE = 40 -MAX_DUPES_LIST_SIZE = 10000 - -########################################################################### -# This section defines project-specific tasks. -# -# Simple tasks (tasks that do not need any concurrency) are based on the -# SimpleTask class and have a process(item) method that is called for -# each item. class CheckIP(SimpleTask): def __init__(self): SimpleTask.__init__(self, 'CheckIP') @@ -112,16 +84,6 @@ class CheckIP(SimpleTask): self._counter -= 1 -class CheckRequirements(SimpleTask): - def __init__(self): - SimpleTask.__init__(self, 'CheckRequirements') - self._checked = False - - def process(self, item): - if not self._checked: - assert shutil.which('pdftohtml') is not None - self._checked = True - class PrepareDirectories(SimpleTask): def __init__(self, warc_prefix): @@ -146,77 +108,8 @@ class PrepareDirectories(SimpleTask): time.strftime('%Y%m%d-%H%M%S') ]) - if not os.path.isfile('duplicate-urls.txt'): - open('duplicate-urls.txt', 'w').close() - - open('%(item_dir)s/%(warc_file_base)s.warc.zst' % item, 'w').close() - open('%(item_dir)s/%(warc_file_base)s_bad-urls.txt' % item, 'w').close() - open('%(item_dir)s/%(warc_file_base)s_duplicate-urls.txt' % item, 'w').close() - - -class MoveFiles(SimpleTask): - def __init__(self): - SimpleTask.__init__(self, 'MoveFiles') - - def process(self, item): - os.rename('%(item_dir)s/%(warc_file_base)s.warc.zst' % item, - '%(data_dir)s/%(warc_file_base)s.%(dict_project)s.%(dict_id)s.warc.zst' % item) - - shutil.rmtree('%(item_dir)s' % item) - - -class SetBadUrls(SimpleTask): - def __init__(self): - SimpleTask.__init__(self, 'SetBadUrls') - - def unquote_url(self, url): - temp = unquote(url) - while url != temp: - url = temp - temp = unquote(url) - return url - - def process(self, item): - item['item_name_original'] = item['item_name'] - items = item['item_name'].split('\0') - items_lower = [self.unquote_url(url).strip().lower() for url in item['item_urls']] - with open('%(item_dir)s/%(warc_file_base)s_bad-urls.txt' % item, 'r') as f: - for url in { - self.unquote_url(url).strip().lower() for url in f - }: - index = items_lower.index(url) - items.pop(index) - items_lower.pop(index) - item['item_name'] = '\0'.join(items) - - -class SetDuplicateUrls(SimpleTask): - def __init__(self): - SimpleTask.__init__(self, 'SetNewDuplicates') - - def process(self, item): - with LOCK: - self._process(item) - - def _process(self, item): - with open('duplicate-urls.txt', 'r') as f: - duplicates = {s.strip() for s in f} - with open('%(item_dir)s/%(warc_file_base)s_duplicate-urls.txt' % item, 'r') as f: - for url in f: - duplicates.add(url.strip()) - with open('duplicate-urls.txt', 'w') as f: - # choose randomly, to cycle periodically popular URLs - duplicates = list(duplicates) - random.shuffle(duplicates) - f.write('\n'.join(duplicates[:MAX_DUPES_LIST_SIZE])) - - -class MaybeSendDoneToTracker(SendDoneToTracker): - def enqueue(self, item): - if len(item['item_name']) == 0: - return self.complete_item(item) - return super(MaybeSendDoneToTracker, self).enqueue(item) - + open('%(item_dir)s/%(warc_file_base)s.warc.gz' % item, 'w').close() + open('%(item_dir)s/%(warc_file_base)s_retry-urls.txt' % item, 'w').close() def get_hash(filename): with open(filename, 'rb') as in_file: @@ -224,104 +117,65 @@ def get_hash(filename): CWD = os.getcwd() PIPELINE_SHA1 = get_hash(os.path.join(CWD, 'pipeline.py')) -LUA_SHA1 = get_hash(os.path.join(CWD, 'urls.lua')) +LUA_SHA1 = get_hash(os.path.join(CWD, 'grab.lua')) +GMD_LUA_SHA1 = get_hash(os.path.join(CWD, 'gmd.lua')) def stats_id_function(item): d = { 'pipeline_hash': PIPELINE_SHA1, 'lua_hash': LUA_SHA1, + 'gmd_lua_hash': GMD_LUA_SHA1, 'python_version': sys.version, } return d +class MoveFiles(SimpleTask): + def __init__(self): + SimpleTask.__init__(self, 'MoveFiles') -class ZstdDict(object): - created = 0 - data = None + def process(self, item): + os.rename('%(item_dir)s/%(warc_file_base)s.warc.gz' % item, + '%(data_dir)s/%(warc_file_base)s.warc.gz' % item) - @classmethod - def get_dict(cls): - if cls.data is not None and time.time() - cls.created < 1800: - return cls.data - response = requests.get( - 'https://legacy-api.arpa.li/dictionary', - params={ - 'project': TRACKER_ID - } - ) - response.raise_for_status() - response = response.json() - if cls.data is not None and response['id'] == cls.data['id']: - cls.created = time.time() - return cls.data - print('Downloading latest dictionary.') - response_dict = requests.get(response['url']) - response_dict.raise_for_status() - raw_data = response_dict.content - if hashlib.sha256(raw_data).hexdigest() != response['sha256']: - raise ValueError('Hash of downloaded dictionary does not match.') - if raw_data[:4] == b'\x28\xB5\x2F\xFD': - raw_data = zstandard.ZstdDecompressor().decompress(raw_data) - cls.data = { - 'id': response['id'], - 'dict': raw_data - } - cls.created = time.time() - return cls.data + shutil.rmtree('%(item_dir)s' % item) +class AwfulBackfeed(SimpleTask): + def __init__(self): + SimpleTask.__init__(self, 'AwfulBackfeed') + + def process(self, item): + with open('%(item_dir)s/new_items' % item) as file: + new_items = file.read() class WgetArgs(object): def realize(self, item): - with open('user-agents.txt', 'r') as f: - USER_AGENT = random.choice(list(f)).strip() wget_args = [ 'timeout', '1000', WGET_AT, - '-U', USER_AGENT, '-v', '--content-on-error', - '--lua-script', 'urls.lua', + '--lua-script', 'grab.lua', '-o', ItemInterpolation('%(item_dir)s/wget.log'), #'--no-check-certificate', '--output-document', ItemInterpolation('%(item_dir)s/wget.tmp'), '--truncate-output', '-e', 'robots=off', '--rotate-dns', - '--recursive', '--level=inf', - '--no-parent', '--timeout', '10', - '--tries', '2', + '--tries', '10', '--span-hosts', - '--page-requisites', - '--waitretry', '0', + '--waitretry', '5000', '--warc-file', ItemInterpolation('%(item_dir)s/%(warc_file_base)s'), - '--warc-header', 'operator: Archive Team', + '--warc-header', 'operator: TheTechRobo ', '--warc-header', 'x-wget-at-project-version: ' + VERSION, '--warc-header', 'x-wget-at-project-name: ' + TRACKER_ID, '--warc-dedup-url-agnostic', - '--warc-compression-use-zstd', - '--warc-zstd-dict-no-include', + '--header', 'Contact: Discord TheTechRobo#7420', '--header', 'Connection: keep-alive', '--header', 'Accept-Language: en-US;q=0.9, en;q=0.8' ] - dict_data = ZstdDict.get_dict() - with open(os.path.join(item['item_dir'], 'zstdict'), 'wb') as f: - f.write(dict_data['dict']) - item['dict_id'] = dict_data['id'] - item['dict_project'] = TRACKER_ID - wget_args.extend([ - '--warc-zstd-dict', ItemInterpolation('%(item_dir)s/zstdict'), - ]) - - item['item_name'] = '\0'.join([ - item_name for item_name in item['item_name'].split('\0') - if (item_name.startswith('custom:') and '&url=' in item_name) \ - or item_name.startswith('http://') \ - or item_name.startswith('https://') \ - ]) - item['item_name_newline'] = item['item_name'].replace('\0', '\n') item_urls = [] custom_items = {} @@ -329,17 +183,8 @@ class WgetArgs(object): for item_name in item['item_name'].split('\0'): wget_args.extend(['--warc-header', 'x-wget-at-project-item-name: '+item_name]) wget_args.append('item-name://'+item_name) - if item_name.startswith('custom:'): - data = parse_qs(item_name.split(':', 1)[1]) - for k, v in data.items(): - if len(v) == 1: - data[k] = v[0] - url = data['url'] - custom_items[url.lower()] = data - else: - url = item_name - item_urls.append(url) - wget_args.append(url) + item_urls.append(item_name) + wget_args.append(item_name) item['item_urls'] = item_urls item['custom_items'] = json.dumps(custom_items) @@ -353,73 +198,36 @@ class WgetArgs(object): return realize(wget_args, item) -########################################################################### -# Initialize the project. -# -# This will be shown in the warrior management panel. The logo should not -# be too big. The deadline is optional. -project = Project( - title = 'URLs', - project_html = ''' - -

Archiving sets of discovered outlinks. · Leaderboard

- ''' -) - pipeline = Pipeline( - CheckIP(), - CheckRequirements(), - GetItemFromTracker('https://{}/{}/multi={}/' - .format(TRACKER_HOST, TRACKER_ID, MULTI_ITEM_SIZE), - downloader, VERSION), - PrepareDirectories(warc_prefix='urls'), - WgetDownload( - WgetArgs(), - max_tries=1, - accept_on_exit_code=[0, 4, 8], - env={ - 'item_dir': ItemValue('item_dir'), - 'item_name': ItemValue('item_name_newline'), - 'custom_items': ItemValue('custom_items'), - 'warc_file_base': ItemValue('warc_file_base') - } - ), - SetBadUrls(), - SetDuplicateUrls(), - PrepareStatsForTracker( - defaults={'downloader': downloader, 'version': VERSION}, - file_groups={ - 'data': [ - ItemInterpolation('%(item_dir)s/%(warc_file_base)s.warc.zst') - ] - }, - id_function=stats_id_function, - ), - MoveFiles(), - LimitConcurrent(NumberConfigValue(min=1, max=20, default='2', - name='shared:rsync_threads', title='Rsync threads', - description='The maximum number of concurrent uploads.'), - UploadWithTracker( - 'https://%s/%s' % (TRACKER_HOST, TRACKER_ID), - downloader=downloader, - version=VERSION, - files=[ - ItemInterpolation('%(data_dir)s/%(warc_file_base)s.%(dict_project)s.%(dict_id)s.warc.zst') - ], - rsync_target_source_path=ItemInterpolation('%(data_dir)s/'), - rsync_extra_args=[ - '--recursive', - '--partial', - '--partial-dir', '.rsync-tmp', - '--min-size', '1', - '--no-compress', - '--compress-level', '0' - ] + CheckIP(), + GetItemFromTracker('http://{}/{}' + .format(TRACKER_HOST, TRACKER_ID), + downloader, VERSION), + PrepareDirectories(warc_prefix='gmd'), + WgetDownload( + WgetArgs(), + max_tries=1, + accept_on_exit_code=[0, 4, 8], + env={ + 'item_dir': ItemValue('item_dir'), + 'item_name': ItemValue('item_name_newline'), + 'custom_items': ItemValue('custom_items'), + 'warc_file_base': ItemValue('warc_file_base') + } ), - ), - MaybeSendDoneToTracker( - tracker_url='https://%s/%s' % (TRACKER_HOST, TRACKER_ID), - stats=ItemValue('stats') - ) -) - + CheckLandslide(), + PrepareStatsForTracker( + defaults={'downloader': downloader, 'version': VERSION}, + file_groups={ + 'data': [ + ItemInterpolation('%(item_dir)s/%(warc_file_base)s.warc.gz') + ] + }, + id_function=stats_id_function, + ), + MoveFiles(), + SendDoneToTracker( + tracker_url='http://%s/%s' % (TRACKER_HOST, TRACKER_ID), + stats=ItemValue('stats') + ) + ) diff --git a/urls.lua b/urls.lua deleted file mode 100644 index e95b1fc..0000000 --- a/urls.lua +++ /dev/null @@ -1,942 +0,0 @@ -local urlparse = require("socket.url") -local http = require("socket.http") -JSON = (loadfile "JSON.lua")() - -local item_dir = os.getenv("item_dir") -local item_name = os.getenv("item_name") -local custom_items = os.getenv("custom_items") -local warc_file_base = os.getenv("warc_file_base") - -local url_count = 0 -local downloaded = {} -local abortgrab = false -local exit_url = false -local min_dedup_mb = 5 - -local timestamp = nil - -if urlparse == nil or http == nil then - io.stdout:write("socket not corrently installed.\n") - io.stdout:flush() - abortgrab = true -end - -local urls = {} -for url in string.gmatch(item_name, "([^\n]+)") do - urls[string.lower(url)] = true -end - -local urls_settings = JSON:decode(custom_items) -for k, _ in pairs(urls_settings) do - urls[string.lower(k)] = true -end - -local status_code = nil - -local redirect_urls = {} -local visited_urls = {} -local ids_to_ignore = {} -for _, lengths in pairs({{8, 4, 4, 4, 12}, {8, 4, 4, 12}}) do - local uuid = "" - for _, i in pairs(lengths) do - for j=1,i do - uuid = uuid .. "[0-9a-fA-F]" - end - if i ~= 12 then - uuid = uuid .. "%-" - end - end - ids_to_ignore[uuid] = true -end -local to_ignore = "" -for i=1,9 do - to_ignore = to_ignore .. "[0-9]" -end -ids_to_ignore["%?" .. to_ignore .. "$"] = true -ids_to_ignore["%?" .. to_ignore .. "[0-9]$"] = true -ids_to_ignore[to_ignore .. "[0-9]%.[0-9][0-9][0-9][0-9]$"] = true -to_ignore = "" -for i=1,50 do - to_ignore = to_ignore .. "[0-9a-zA-Z]" -end -ids_to_ignore[to_ignore .. "%-[0-9][0-9][0-9][0-9][0-9]"] = true -ids_to_ignore["[0-9a-zA-Z%-_]!%-?[0-9]"] = true -to_ignore = "" -for i=1,32 do - to_ignore = to_ignore .. "[0-9a-fA-F]" -end -ids_to_ignore["[^0-9a-fA-F]" .. to_ignore .. "[^0-9a-fA-F]"] = true -ids_to_ignore["[^0-9a-fA-F]" .. to_ignore .. "$"] = true - -local current_url = nil -local current_settings = nil -local bad_urls = {} -local queued_urls = {} -local bad_params = {} -local bad_patterns = {} -local ignore_patterns = {} -local page_requisite_patterns = {} -local duplicate_urls = {} -local extract_outlinks_patterns = {} -local item_first_url = nil -local redirect_domains = {} -local checked_domains = {} - -local parenturl_uuid = nil -local parenturl_requisite = nil - -local dupes_file = io.open("duplicate-urls.txt", "r") -for url in dupes_file:lines() do - duplicate_urls[url] = true -end -dupes_file:close() - -local bad_params_file = io.open("bad-params.txt", "r") -for param in bad_params_file:lines() do - local param = string.gsub( - param, "([a-zA-Z])", - function(c) - return "[" .. string.lower(c) .. string.upper(c) .. "]" - end - ) - table.insert(bad_params, param) -end -bad_params_file:close() - -local bad_patterns_file = io.open("bad-patterns.txt", "r") -for pattern in bad_patterns_file:lines() do - table.insert(bad_patterns, pattern) -end -bad_patterns_file:close() - -local ignore_patterns_file = io.open("ignore-patterns.txt", "r") -for pattern in ignore_patterns_file:lines() do - table.insert(ignore_patterns, pattern) -end -ignore_patterns_file:close() - -local page_requisite_patterns_file = io.open("page-requisite-patterns.txt", "r") -for pattern in page_requisite_patterns_file:lines() do - table.insert(page_requisite_patterns, pattern) -end -page_requisite_patterns_file:close() - -local extract_outlinks_patterns_file = io.open("extract-outlinks-patterns.txt", "r") -for pattern in extract_outlinks_patterns_file:lines() do - extract_outlinks_patterns[pattern] = true -end -extract_outlinks_patterns_file:close() - -read_file = function(file, bytes) - if not bytes then - bytes = "*all" - end - if file then - local f = assert(io.open(file)) - local data = f:read(bytes) - f:close() - if not data then - data = "" - end - return data - else - return "" - end -end - -table_length = function(t) - local count = 0 - for _ in pairs(t) do - count = count + 1 - end - return count -end - -check_domain_outlinks = function(url, target) - local parent = string.match(url, "^https?://([^/]+)") - while parent do - if (not target and extract_outlinks_patterns[parent]) - or (target and parent == target) then - return parent - end - parent = string.match(parent, "^[^%.]+%.(.+)$") - end - return false -end - -bad_code = function(status_code) - return status_code ~= 200 - and status_code ~= 301 - and status_code ~= 302 - and status_code ~= 303 - and status_code ~= 307 - and status_code ~= 308 - and status_code ~= 404 - and status_code ~= 410 -end - -find_path_loop = function(url, max_repetitions) - local tested = {} - for s in string.gmatch(urlparse.unescape(url), "([^/]+)") do - s = string.lower(s) - if not tested[s] then - if s == "" then - tested[s] = -2 - else - tested[s] = 0 - end - end - tested[s] = tested[s] + 1 - if tested[s] == max_repetitions then - return true - end - end - return false -end - -percent_encode_url = function(url) - temp = "" - for c in string.gmatch(url, "(.)") do - local b = string.byte(c) - if b < 32 or b > 126 then - c = string.format("%%%02X", b) - end - temp = temp .. c - end - return temp -end - -queue_url = function(url, withcustom) - if not url then - return nil - end - queue_new_urls(url) - if not string.match(url, "^https?://[^/]+%.") then - return nil - end ---local original = url - load_setting_depth = function(s) - n = tonumber(current_settings[s]) - if n == nil then - n = 0 - end - return n - 1 - end - url = string.gsub(url, "'%s*%+%s*'", "") - url = percent_encode_url(url) - url = string.match(url, "^([^{]+)") - url = string.match(url, "^([^<]+)") - url = string.match(url, "^([^\\]+)") - if current_settings and current_settings["all"] and withcustom then - local depth = load_setting_depth("depth") - local keep_random = load_setting_depth("keep_random") - local keep_all = load_setting_depth("keep_all") - local any_domain = load_setting_depth("any_domain") - if depth >= 0 then - local random = current_settings["random"] - local all = current_settings["all"] - if keep_random < 0 or random == "" then - random = nil - keep_random = nil - end - if keep_all < 0 or all == 0 then - all = nil - keep_all = nil - end - if any_domain <= 0 then - any_domain = nil - end - local settings = { - depth=depth, - all=all, - keep_all=keep_all, - random=random, - keep_random=keep_random, - url=url, - any_domain=any_domain - } - url = "custom:" - for _, k in pairs( - {"all", "any_domain", "depth", "keep_all", "keep_random", "random", "url"} - ) do - local v = settings[k] - if v ~= nil then - url = url .. k .. "=" .. urlparse.escape(tostring(v)) .. "&" - end - end - url = string.sub(url, 1, -2) - end - end - if not duplicate_urls[url] and not queued_urls[url] then - if find_path_loop(url, 2) then - return false - end ---print("queuing",original, url) - queued_urls[url] = true - end -end - -queue_monthly_url = function(url) - local random_s = os.date("%Y%m", timestamp) - url = percent_encode_url(url) - queued_urls["custom:random=" .. random_s .. "&url=" .. urlparse.escape(tostring(url))] = true -end - -remove_param = function(url, param_pattern) - local newurl = url - repeat - url = newurl - newurl = string.gsub(url, "([%?&;])" .. param_pattern .. "=[^%?&;]*[%?&;]?", "%1") - until newurl == url - return string.match(newurl, "^(.-)[%?&;]?$") -end - -queue_new_urls = function(url) - if not url then - return nil - end - local newurl = string.gsub(url, "([%?&;])[aA][mM][pP];", "%1") - if url == current_url then - if newurl ~= url then - queue_url(newurl) - end - end - for _, param_pattern in pairs(bad_params) do - newurl = remove_param(newurl, param_pattern) - end - if newurl ~= url then - queue_url(newurl) - end - newurl = string.match(newurl, "^([^%?&]+)") - if newurl ~= url then - queue_url(newurl) - end - url = string.gsub(url, """, '"') - url = string.gsub(url, "&", "&") - for newurl in string.gmatch(url, '([^"\\]+)') do - if newurl ~= url then - queue_url(newurl) - end - end -end - -report_bad_url = function(url) - if current_url ~= nil then - bad_urls[current_url] = true - else - bad_urls[string.lower(url)] = true - end -end - -strip_url = function(url) - url = string.match(url, "^https?://(.+)$") - newurl = string.match(url, "^www%.(.+)$") - if newurl then - url = newurl - end - return url -end - -wget.callbacks.download_child_p = function(urlpos, parent, depth, start_url_parsed, iri, verdict, reason) - local url = urlpos["url"]["url"] - local parenturl = parent["url"] - local extract_page_requisites = false - - local current_settings_all = current_settings and current_settings["all"] - local current_settings_any_domain = current_settings and current_settings["any_domain"] - - --queue_monthly_url(string.match(url, "^(https?://[^/]+)") .. "/") - - if redirect_urls[parenturl] and not ( - status_code == 300 and string.match(parenturl, "^https?://[^/]*feb%-web%.ru/") - ) then - return true - end - - if find_path_loop(url, 2) then - return false - end - - local _, count = string.gsub(url, "[/%?]", "") - if count >= 16 then - return false - end - - for _, extension in pairs({ - "pdf", - "doc[mx]?", - "xls[mx]?", - "ppt[mx]?", - "zip", - "odt", - "odm", - "ods", - "odp", - "xml", - "json", - "torrent" - }) do - if string.match(parenturl, "%." .. extension .. "$") - or string.match(parenturl, "%." .. extension .. "[^a-z0-9A-Z]") - or string.match(parenturl, "%." .. string.upper(extension) .. "$") - or string.match(parenturl, "%." .. string.upper(extension) .. "[^a-z0-9A-Z]") then - return false - end - if string.match(url, "%." .. extension .. "$") - or string.match(url, "%." .. extension .. "[^a-z0-9A-Z]") - or string.match(url, "%." .. string.upper(extension) .. "$") - or string.match(url, "%." .. string.upper(extension) .. "[^a-z0-9A-Z]") then - queue_url(url) - return false - end - end - - local domain_match = checked_domains[item_first_url] - if not domain_match then - domain_match = check_domain_outlinks(item_first_url) - if not domain_match then - domain_match = "none" - end - checked_domains[item_first_url] = domain_match - end - if domain_match ~= "none" then - extract_page_requisites = true - local newurl_domain = string.match(url, "^https?://([^/]+)") - local to_queue = true - for domain, _ in pairs(redirect_domains) do - if check_domain_outlinks(url, domain) then - to_queue = false - break - end - end - if to_queue then - queue_url(url) - return false - end - end - - --[[if not extract_page_requisites then - return false - end]] - - if (status_code < 200 or status_code >= 300 or not verdict) - and not current_settings_all then - return false - end - - --[[if string.len(url) == string.len(parenturl) then - local good_url = false - local index1, index2 - temp_url = string.match(url, "^https?://(.+)$") - temp_parenturl = string.match(parenturl, "^https?://(.+)$") - local start_index = 1 - repeat - index1 = string.find(temp_url, "/", start_index) - index2 = string.find(temp_parenturl, "/", start_index) - if index1 ~= index2 then - good_url = true - break - end - if index1 then - start_index = index1 + 1 - end - until not index1 or not index2 - if not good_url then - return false - end - end]] - - if parenturl_uuid == nil then - parenturl_uuid = false - for old_parent_url, _ in pairs(visited_urls) do - for id_to_ignore, _ in pairs(ids_to_ignore) do - if string.match(old_parent_url, id_to_ignore) then - parenturl_uuid = true - break - end - end - if parenturl_uuid then - break - end - end - end - if parenturl_uuid then - for id_to_ignore, _ in pairs(ids_to_ignore) do - if string.match(url, id_to_ignore) and not current_settings_all then - return false - end - end - end - - if urlpos["link_refresh_p"] ~= 0 then - queue_url(url) - return false - end - - if parenturl_requisite == nil then - parenturl_requisite = false - for _, pattern in pairs(page_requisite_patterns) do - for old_parent_url, _ in pairs(visited_urls) do - if string.match(old_parent_url, pattern) then - parenturl_requisite = true - break - end - end - if parenturl_requisite then - break - end - end - end - if parenturl_requisite and not current_settings_all then - return false - end - - if urlpos["link_inline_p"] ~= 0 then - queue_url(url) - return false - end - - local current_host = string.match(urlpos["url"]["host"], "([^%.]+%.[^%.]+)$") - local first_parent_host = string.match(parent["host"], "([^%.]+%.[^%.]+)$") - - if current_url then - first_parent_host = string.match(current_url .. "/", "^https?://[^/]-([^/%.]+%.[^/%.]+)/") - end - - if current_settings_all and ( - current_settings_any_domain - or first_parent_host == current_host - ) then - queue_url(url, true) - return false - end - - --[[for old_parent_url, _ in pairs(visited_urls) do - for _, pattern in pairs(page_requisite_patterns) do - if string.match(old_parent_url, pattern) then - return false - end - end - end - - for _, pattern in pairs(page_requisite_patterns) do - if string.match(url, pattern) then - queue_url(url) - return false - end - end]] -end - -wget.callbacks.get_urls = function(file, url, is_css, iri) - local html = nil - - if url then - downloaded[url] = true - end - - local function check(url, headers) - local url = string.match(url, "^([^#]+)") - url = string.gsub(url, "&", "&") - queue_url(url) - end - - local function checknewurl(newurl, headers) - if string.match(newurl, "^#") then - return nil - end - if string.match(newurl, "\\[uU]002[fF]") then - return checknewurl(string.gsub(newurl, "\\[uU]002[fF]", "/"), headers) - end - if string.match(newurl, "^https?:////") then - check(string.gsub(newurl, ":////", "://"), headers) - elseif string.match(newurl, "^https?://") then - check(newurl, headers) - elseif string.match(newurl, "^https?:\\/\\?/") then - check(string.gsub(newurl, "\\", ""), headers) - elseif not url then - return nil - elseif string.match(newurl, "^\\/") then - checknewurl(string.gsub(newurl, "\\", ""), headers) - elseif string.match(newurl, "^//") then - check(urlparse.absolute(url, newurl), headers) - elseif string.match(newurl, "^/") then - check(urlparse.absolute(url, newurl), headers) - elseif string.match(newurl, "^%.%./") then - if string.match(url, "^https?://[^/]+/[^/]+/") then - check(urlparse.absolute(url, newurl), headers) - else - checknewurl(string.match(newurl, "^%.%.(/.+)$"), headers) - end - elseif string.match(newurl, "^%./") then - check(urlparse.absolute(url, newurl), headers) - end - end - - local function checknewshorturl(newurl, headers) - if string.match(newurl, "^#") then - return nil - end - if url and string.match(newurl, "^%?") then - check(urlparse.absolute(url, newurl), headers) - elseif url and not (string.match(newurl, "^https?:\\?/\\?//?/?") - or string.match(newurl, "^[/\\]") - or string.match(newurl, "^%./") - or string.match(newurl, "^[jJ]ava[sS]cript:") - or string.match(newurl, "^[mM]ail[tT]o:") - or string.match(newurl, "^vine:") - or string.match(newurl, "^android%-app:") - or string.match(newurl, "^ios%-app:") - or string.match(newurl, "^%${")) then - check(urlparse.absolute(url, newurl), headers) - else - checknewurl(newurl, headers) - end - end - - if (status_code == 200 and current_settings and current_settings["deep_extract"]) - or not url then - html = read_file(file) - if not url then - html = string.gsub(html, " ", " ") - html = string.gsub(html, "<", "<") - html = string.gsub(html, ">", ">") - html = string.gsub(html, """, '"') - html = string.gsub(html, "'", "'") - html = string.gsub(html, "&#(%d+);", - function(n) - return string.char(n) - end - ) - html = string.gsub(html, "&#x(%d+);", - function(n) - return string.char(tonumber(n, 16)) - end - ) - local temp_html = string.gsub(html, "\n", "") - for _, remove in pairs({"", "
", "]*>"}) do - if remove ~= "" then - temp_html = string.gsub(temp_html, remove, "") - end - for newurl in string.gmatch(temp_html, "(https?://[^%s<>#\"'\\`{})%]]+)") do - while string.match(newurl, "[%.&,!;]$") do - newurl = string.match(newurl, "^(.+).$") - end - check(newurl) - end - end - end - for newurl in string.gmatch(html, "[^%-][hH][rR][eE][fF]='([^']+)'") do - checknewshorturl(newurl) - end - for newurl in string.gmatch(html, '[^%-][hH][rR][eE][fF]="([^"]+)"') do - checknewshorturl(newurl) - end - for newurl in string.gmatch(string.gsub(html, "&[qQ][uU][oO][tT];", '"'), '"(https?://[^"]+)') do - checknewurl(newurl) - end - for newurl in string.gmatch(string.gsub(html, "'", "'"), "'(https?://[^']+)") do - checknewurl(newurl) - end - if url then - for newurl in string.gmatch(html, ">%s*([^<%s]+)") do - checknewurl(newurl) - end - end - --[[for newurl in string.gmatch(html, "%(([^%)]+)%)") do - checknewurl(newurl) - end]] - elseif string.match(url, "^https?://[^/]+/.*[^a-z0-9A-Z][pP][dD][fF]$") - or string.match(url, "^https?://[^/]+/.*[^a-z0-9A-Z][pP][dD][fF][^a-z0-9A-Z]") - or string.match(read_file(file, 4), "%%[pP][dD][fF]") then - io.stdout:write("Extracting links from PDF.\n") - io.stdout:flush() - local temp_file = file .. "-html.html" - local check_file = io.open(temp_file) - if check_file then - check_file:close() - os.remove(temp_file) - end - os.execute("pdftohtml -nodrm -hidden -i -s -q " .. file) - check_file = io.open(temp_file) - if check_file then - check_file:close() - local temp_length = table_length(queued_urls) - wget.callbacks.get_urls(temp_file, nil, nil, nil) - io.stdout:write("Found " .. tostring(table_length(queued_urls)-temp_length) .. " URLs.\n") - io.stdout:flush() - os.remove(temp_file) - else - io.stdout:write("Not a PDF.\n") - io.stdout:flush() - end - end -end - -wget.callbacks.write_to_warc = function(url, http_stat) - local url_lower = string.lower(url["url"]) - if urls[url_lower] then - current_url = url_lower - current_settings = urls_settings[url_lower] - end - if current_settings and not current_settings["random"] then - queue_url(url["url"]) - return false - end - if bad_code(http_stat["statcode"]) then - return false - elseif http_stat["statcode"] >= 300 and http_stat["statcode"] <= 399 then - local newloc = urlparse.absolute(url["url"], http_stat["newloc"]) - if string.match(newloc, "^https?://[^/]*google%.com/sorry") - or string.match(newloc, "^https?://[^/]*google%.com/[sS]ervice[lL]ogin") - or string.match(newloc, "^https?://consent%.youtube%.com/") - or string.match(newloc, "^https?://consent%.google%.com/") - or string.match(newloc, "^https?://misuse%.ncbi%.nlm%.nih%.gov/") - or string.match(newloc, "^https?://myprivacy%.dpgmedia%.nl/") - or string.match(newloc, "^https?://idp%.springer%.com/authorize%?") - or string.match(newloc, "^https?://[^/]*instagram%.com/accounts/") then - report_bad_url(url["url"]) - exit_url = true - return false - end - return true - elseif http_stat["statcode"] ~= 200 then - return true - end - if true then - return true - end - if http_stat["len"] > min_dedup_mb * 1024 * 1024 then - io.stdout:write("Data larger than " .. tostring(min_dedup_mb) .. " MB. Checking with Wayback Machine.\n") - io.stdout:flush() - while true do - local body, code, headers, status = http.request( - "https://web.archive.org/__wb/calendarcaptures/2" - .. "?url=" .. urlparse.escape(url["url"]) - .. "&date=202" - ) - if code ~= 200 then - io.stdout:write("Got " .. tostring(code) .. " from the Wayback Machine.\n") - io.stdout:flush() - os.execute("sleep 10") - else - data = JSON:decode(body) - if not data["items"] or not data["colls"] then - return true - end - for _, item in pairs(data["items"]) do - if item[2] == 200 then - local coll_id = item[3] + 1 - if not coll_id then - io.stdout:write("Could get coll ID.\n") - io.stdout:flush() - end - local collections = data["colls"][coll_id] - if not collections then - io.stdout:write("Could not get collections.\n") - io.stdout:flush() - end - for _, collection in pairs(collections) do - if collection == "archivebot" - or string.find(collection, "archiveteam") then - io.stdout:write("Archive Team got this URL before.\n") - return false - end - end - end - end - break - end - end - end - return true -end - -wget.callbacks.httploop_result = function(url, err, http_stat) - status_code = http_stat["statcode"] - - parenturl_uuid = nil - parenturl_requisite = nil - - local url_lower = string.lower(url["url"]) - if urls[url_lower] then - current_url = url_lower - current_settings = urls_settings[url_lower] - end - - if not timestamp then - local body, code, headers, status = http.request("https://legacy-api.arpa.li/now") - assert(code == 200) - timestamp = tonumber(string.match(body, "^([0-9]+)")) - end - - - if status_code ~= 0 then - local base_url = string.match(url["url"], "^(https://[^/]+)") - if base_url then - for _, newurl in pairs({ - base_url .. "/robots.txt", - base_url .. "/favicon.ico", - base_url .. "/" - }) do - queue_monthly_url(newurl) - end - end - end - - url_count = url_count + 1 - io.stdout:write(url_count .. "=" .. status_code .. " " .. url["url"] .. " \n") - io.stdout:flush() - - if redirect_domains["done"] then - redirect_domains = {} - redirect_urls = {} - visited_urls = {} - item_first_url = nil - end - redirect_domains[string.match(url["url"], "^https?://([^/]+)")] = true - if not item_first_url then - item_first_url = url["url"] - end - - visited_urls[url["url"]] = true - - if exit_url then - exit_url = false - return wget.actions.EXIT - end - - if status_code >= 300 and status_code <= 399 then - local newloc = urlparse.absolute(url["url"], http_stat["newloc"]) - redirect_urls[url["url"]] = true - --[[if strip_url(url["url"]) == strip_url(newloc) then - queued_urls[newloc] = true - return wget.actions.EXIT - end]] - if downloaded[newloc] then - return wget.actions.EXIT - elseif string.match(url["url"], "^https?://[^/]*telegram%.org/dl%?tme=") - or ( - string.match(newloc, "^https?://www%.(.+)") - or string.match(newloc, "^https?://(.+)") - ) == ( - string.match(url["url"], "^https?://www%.(.+)") - or string.match(url["url"], "^https?://(.+)") - ) - or status_code == 301 - or status_code == 308 then - queue_url(newloc) - return wget.actions.EXIT - end - else - redirect_domains["done"] = true - end - - if downloaded[url["url"]] then - report_bad_url(url["url"]) - return wget.actions.EXIT - end - - for _, pattern in pairs(ignore_patterns) do - if string.match(url["url"], pattern) then - return wget.actions.EXIT - end - end - - if status_code >= 200 and status_code <= 399 then - downloaded[url["url"]] = true - end - - if status_code >= 200 and status_code < 300 then - queue_new_urls(url["url"]) - end - - if bad_code(status_code) then - io.stdout:write("Server returned " .. http_stat.statcode .. " (" .. err .. ").\n") - io.stdout:flush() - report_bad_url(url["url"]) - return wget.actions.EXIT - end - - local sleep_time = 0 - - if sleep_time > 0.001 then - os.execute("sleep " .. sleep_time) - end - - return wget.actions.NOTHING -end - -wget.callbacks.finish = function(start_time, end_time, wall_time, numurls, total_downloaded_bytes, total_download_time) - local function submit_backfeed(newurls) - local tries = 0 - local maxtries = 4 - while tries < maxtries do - local body, code, headers, status = http.request( - "https://legacy-api.arpa.li/backfeed/legacy/urls-glx7ansh4e17aii", - newurls .. "\0" - ) - print(body) - if code == 200 then - io.stdout:write("Submitted discovered URLs.\n") - io.stdout:flush() - break - end - io.stdout:write("Failed to submit discovered URLs." .. tostring(code) .. tostring(body) .. "\n") - io.stdout:flush() - os.execute("sleep " .. math.floor(math.pow(2, tries))) - tries = tries + 1 - end - if tries == maxtries then - abortgrab = true - end - end - - local newurls = nil - local is_bad = false - local count = 0 - local dup_urls = io.open(item_dir .. "/" .. warc_file_base .. "_duplicate-urls.txt", "w") - for url, _ in pairs(queued_urls) do - for _, pattern in pairs(bad_patterns) do - is_bad = string.match(url, pattern) - if is_bad then - io.stdout:write("Filtering out URL " .. url .. ".\n") - io.stdout:flush() - break - end - end - if not is_bad then - io.stdout:write("Queuing URL " .. url .. ".\n") - io.stdout:flush() - dup_urls:write(url .. "\n") - if newurls == nil then - newurls = url - else - newurls = newurls .. "\0" .. url - end - count = count + 1 - if count == 100 then - submit_backfeed(newurls) - newurls = nil - count = 0 - end - end - end - if newurls ~= nil then - submit_backfeed(newurls) - end - dup_urls:close() - - local file = io.open(item_dir .. "/" .. warc_file_base .. "_bad-urls.txt", "w") - for url, _ in pairs(bad_urls) do - file:write(url .. "\n") - end - file:close() -end - -wget.callbacks.before_exit = function(exit_status, exit_status_string) - if abortgrab then - return wget.exits.IO_FAIL - end - return exit_status -end - diff --git a/user-agents.txt b/user-agents.txt deleted file mode 100644 index b78e5a4..0000000 --- a/user-agents.txt +++ /dev/null @@ -1,381 +0,0 @@ -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:40.0) Gecko/20100101 Firefox/62.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:43.0) Gecko/20100101 Firefox/43.0 SeaMonkey/2.40 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:45.0) Gecko/20100101 Firefox/45.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:47.0) Gecko/20100101 Firefox/47.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:48.0) Gecko/20100101 Firefox/48.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:49.0) Gecko/20100101 Firefox/49.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:50.0) Gecko/20100101 Firefox/50.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:52.0) Gecko/20100101 Firefox/52.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:54.0) Gecko/20100101 Firefox/54.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:56.0) Gecko/20100101 Firefox/56.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:56.0) Gecko/20100101 Firefox/56.0.4 Waterfox/56.0.4 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:56.0; Waterfox) Gecko/20100101 Firefox/56.2.3 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:56.0; Waterfox) Gecko/20100101 Firefox/56.2.4 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:56.0; Waterfox) Gecko/20100101 Firefox/56.2.5 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:58.0) Gecko/20100101 Firefox/58.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:59.0) Gecko/20100101 Firefox/59.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:60.0) Gecko/20100101 Firefox/60.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:61.0) Gecko/20100101 Firefox/61.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:62.0) Gecko/20100101 Firefox/62.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:63.0) Gecko/20100101 Firefox/63.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:64.0) Gecko/20100101 Firefox/64.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:65.0) Gecko/20100101 Firefox/65.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:43.0) Gecko/20100101 Firefox/43.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:43.0) Gecko/20100101 Firefox/43.0 SeaMonkey/2.40 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:47.0) Gecko/20100101 Firefox/47.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:48.0) Gecko/20100101 Firefox/48.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:49.0) Gecko/20100101 Firefox/49.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:50.0) Gecko/20100101 Firefox/50.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:51.0) Gecko/20100101 Firefox/51.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:51.0) Gecko/20100101 Firefox/51.0 SeaMonkey/2.48 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:52.0) Gecko/20100101 Firefox/52.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:54.0) Gecko/20100101 Firefox/54.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:55.0) Gecko/20100101 Firefox/55.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:56.0) Gecko/20100101 Firefox/56.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:56.0) Gecko/20100101 Firefox/56.0.4 Waterfox/56.0.4 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:56.0; Waterfox) Gecko/20100101 Firefox/56.2.3 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:56.0; Waterfox) Gecko/20100101 Firefox/56.2.4 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:56.0; Waterfox) Gecko/20100101 Firefox/56.2.5 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:57.0) Gecko/20100101 Firefox/57.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:58.0) Gecko/20100101 Firefox/58.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:59.0) Gecko/20100101 Firefox/59.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:60.0) Gecko/20100101 Firefox/60.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:61.0) Gecko/20100101 Firefox/61.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:62.0) Gecko/20100101 Firefox/62.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:63.0) Gecko/20100101 Firefox/63.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:64.0) Gecko/20100101 Firefox/64.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:49.0) Gecko/20100101 Firefox/49.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:50.0) Gecko/20100101 Firefox/50.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:51.0) Gecko/20100101 Firefox/51.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:52.0) Gecko/20100101 Firefox/52.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:53.0) Gecko/20100101 Firefox/53.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:54.0) Gecko/20100101 Firefox/54.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:55.0) Gecko/20100101 Firefox/55.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:56.0) Gecko/20100101 Firefox/56.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:56.0; Waterfox) Gecko/20100101 Firefox/56.2.5 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:57.0) Gecko/20100101 Firefox/57.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:58.0) Gecko/20100101 Firefox/58.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:59.0) Gecko/20100101 Firefox/59.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:59.0.2) Gecko/20100101 Firefox/59.0.2 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:60.0) Gecko/20100101 Firefox/60.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:61.0) Gecko/20100101 Firefox/61.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:61.0) Gecko/20100101 Firefox/62.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:62.0) Gecko/20100101 Firefox/62.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:63.0) Gecko/20100101 Firefox/63.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:64.0) Gecko/20100101 Firefox/64.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:40.0) Gecko/20100101 Firefox/40.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:48.0) Gecko/20100101 Firefox/48.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:52.0) Gecko/20100101 Firefox/52.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:52.0) Gecko/20100101 Firefox/52.0 SeaMonkey/2.49.3 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:55.0) Gecko/20100101 Firefox/55.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:56.0) Gecko/20100101 Firefox/56.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:56.0) Gecko/20100101 Firefox/56.0.4 Waterfox/56.0.4 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:56.0; Waterfox) Gecko/20100101 Firefox/56.2.3 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:56.0; Waterfox) Gecko/20100101 Firefox/56.2.4 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:56.0; Waterfox) Gecko/20100101 Firefox/56.2.5 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:57.0) Gecko/20100101 Firefox/57.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:57.0) Gecko/20100101 Firefox/99.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:58.0) Gecko/20100101 Firefox/58.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:59.0) Gecko/20100101 Firefox/59.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:60.0) Gecko/20100101 Firefox/60.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:61.0) Gecko/20100101 Firefox/61.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:62.0) Gecko/20100101 Firefox/62.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:63.0) Gecko/20100101 Firefox/63.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:64.0) Gecko/20100101 Firefox/64.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:65.0) Gecko/20100101 Firefox/65.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:52.0) Gecko/20100101 Firefox/52.0 SeaMonkey/2.49.2 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:56.0; Waterfox) Gecko/20100101 Firefox/56.2.5 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:57.0) Gecko/20100101 Firefox/57.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:60.0) Gecko/20100101 Firefox/60.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:61.0) Gecko/20100101 Firefox/61.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:62.0) Gecko/20100101 Firefox/62.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:63.0) Gecko/20100101 Firefox/63.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:64.0) Gecko/20100101 Firefox/64.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:65.0) Gecko/20100101 Firefox/65.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:40.0) Gecko/20100101 Firefox/40.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:45.0) Gecko/20100101 Firefox/45.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:47.0) Gecko/20100101 Firefox/47.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:48.0) Gecko/20100101 Firefox/48.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:52.9) Gecko/20100101 Goanna/3.4 Firefox/52.9 PaleMoon/27.8.3 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:44.0) Gecko/20100101 Firefox/44.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:45.0) Gecko/20100101 Firefox/45.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:47.0) Gecko/20100101 Firefox/47.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:48.0) Gecko/20100101 Firefox/48.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:49.0) Gecko/20100101 Firefox/49.0.2.1 Waterfox/49.0.2.1 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:45.0) Gecko/20100101 Firefox/45.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:48.0) Gecko/20100101 Firefox/48.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:56.0) Gecko/20100101 Firefox/56.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:56.0; Waterfox) Gecko/20100101 Firefox/56.2.5 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:41.0) Gecko/20100101 Firefox/41.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:47.0) Gecko/20100101 Firefox/47.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:48.0) Gecko/20100101 Firefox/48.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:52.0) Gecko/20100101 Firefox/52.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:52.0) Gecko/20100101 Firefox/52.0 SeaMonkey/2.49.1 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:55.0) Gecko/20100101 Firefox/55.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:56.0) Gecko/20100101 Firefox/56.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:56.0) Gecko/20100101 Firefox/56.0.1 Waterfox/56.0.1 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:57.0) Gecko/20100101 Firefox/57.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:58.0) Gecko/20100101 Firefox/58.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:59.0) Gecko/20100101 Firefox/59.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:60.0) Gecko/20100101 Firefox/60.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:61.0) Gecko/20100101 Firefox/61.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:62.0) Gecko/20100101 Firefox/62.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:63.0) Gecko/20100101 Firefox/63.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/11.1.2 Safari/605.1.15 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1; rv:50.0) Gecko/20100101 Firefox/50.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1; rv:55.0) Gecko/20100101 Firefox/55.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_2; rv:49.0) Gecko/20100101 Firefox/49.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.102 Safari/537.36 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0.1 Safari/605.1.15 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/11.1.2 Safari/605.1.15 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0.1 Safari/605.1.15 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0.2 Safari/605.1.15 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0 Safari/605.1.15 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0.1 Safari/605.1.15 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3639.1 Safari/537.36 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0.2 Safari/605.1.15 -Mozilla/5.0 (Macintosh; Intel Mac OS X 10_29_81; rv:45.70.23) Gecko/20134284 Firefox/45.70.23 -Mozilla/5.0 (Macintosh; Intel Mac OS X 11.11; rv:51.0) Gecko/20100101 Firefox/60.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 9.3; rv:45.0) Gecko/20100101 Firefox/57.0 -Mozilla/5.0 (Macintosh; Intel Mac OS X 9.3; rv:45.0) Gecko/20100101 Firefox/59.0.2 -Mozilla/5.0 (Macintosh; PPC Mac OS X 10.11; rv:46.0) Gecko/20100101 Firefox/46.0 -Mozilla/5.0 (Macintosh; PPC Mac OS X 10.12; rv:46.0) Gecko/20100101 Firefox/46.0 -Mozilla/5.0 (Macintosh; PPC Mac OS X 10.12; rv:55.0) Gecko/20100101 Firefox/55.0 -Mozilla/5.0 (Macintosh; PPC Mac OS X 10.4; FPR7; rv:45.0) Gecko/20100101 Firefox/45.0 TenFourFox/G5 -Mozilla/5.0 (Macintosh; PPC Mac OS X 10.4; FPR8; rv:45.0) Gecko/20100101 Firefox/45.0 TenFourFox/G5 -Mozilla/5.0 (Macintosh; PPC Mac OS X 10.4; FPR9; rv:45.0) Gecko/20100101 Firefox/45.0 TenFourFox/G5 -Mozilla/5.0 (Macintosh; PPC Mac OS X 10.5; FPR8; rv:45.0) Gecko/20100101 Firefox/45.0 TenFourFox/7450 -Mozilla/5.0 (Macintosh; PPC Mac OS X 10.8; rv:47.0) Gecko/20100101 Firefox/47.0 -Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.10; rv:59.0) Gecko/20100101 Firefox/59.0 -Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.10; rv:64.0) Gecko/20100101 Firefox/64.0 -Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.10; rv:65.0) Gecko/20100101 Firefox/65.0 -Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.11; rv:59.0) Gecko/20100101 Firefox/59.0 -Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.11; rv:60.0) Gecko/20100101 Firefox/60.0 -Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.11; rv:61.0) Gecko/20100101 Firefox/61.0 -Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.11; rv:62.0) Gecko/20100101 Firefox/62.0 -Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.12; rv:54.0) Gecko/20100101 Firefox/54.0 -Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.12; rv:60.0) Gecko/20100101 Firefox/60.0 -Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.12; rv:61.0) Gecko/20100101 Firefox/61.0 -Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.12; rv:62.0) Gecko/20100101 Firefox/62.0 -Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.13; rv:59.0) Gecko/20100101 Firefox/59.0 -Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.13; rv:60.0) Gecko/20100101 Firefox/60.0 -Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.13; rv:62.0) Gecko/20100101 Firefox/62.0 -Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36 -Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36 -Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.85 Safari/537.36 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:20.0) Gecko/20100101 Firefox/60.0 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:40.0) Gecko/20100101 Firefox/40.0 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:40.0) Gecko/20100101 Firefox/45.0 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:40.0) Gecko/20100101 IceDragon/40.1.1.18 Firefox/40.0.2 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:41.0) Gecko/20100101 Firefox/41.0 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:42.0) Gecko/20100101 Firefox/42.0 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0 Framafox/43.0.1 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0 SeaMonkey/2.40 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:44.0) Gecko/20100101 Firefox/44.0 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:45.0) Gecko/20100101 Firefox/45.0 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:45.63.16) Gecko/20175595 Firefox/45.63.16 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:46.0) Gecko/20100101 Firefox/46.0 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:47.0) Gecko/20100101 Firefox/47.0 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:48.0) Gecko/20100101 Firefox/48.0 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:49.0) Gecko/20100101 Firefox/49.0 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:49.0) Gecko/20100101 Firefox/49.0 SeaMonkey/2.46 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:51.0) Gecko/20100101 Firefox/45.0 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:51.0) Gecko/20100101 Firefox/47.0 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:51.0) Gecko/20100101 Firefox/60.0 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:51.0) Gecko/20100101 Firefox/64.0 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.0) Gecko/20100101 Firefox/52.0 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.0) Gecko/20100101 Firefox/52.0 Cyberfox/52.9.1 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.0) Gecko/20100101 Firefox/52.0 SeaMonkey/2.49.1 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.0) Gecko/20100101 Firefox/52.0 SeaMonkey/2.49.2 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.0) Gecko/20100101 Firefox/52.0 SeaMonkey/2.49.2 Lightning/5.4 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.0) Gecko/20100101 Firefox/52.0 SeaMonkey/2.49.3 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.0) Gecko/20100101 Firefox/52.0 SeaMonkey/2.49.4 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.0) Gecko/20100101 Firefox/52.0 Zotero/5.0 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.9) Gecko/20100101 Firefox/52.9 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.9) Gecko/20100101 Goanna/3.4 Firefox/52.9 PaleMoon/27.6.2 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.9) Gecko/20100101 Goanna/3.4 Firefox/52.9 PaleMoon/27.7.2 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.9) Gecko/20100101 Goanna/3.4 Firefox/52.9 PaleMoon/27.8.2 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.9) Gecko/20100101 Goanna/3.4 Firefox/52.9 PaleMoon/27.8.3 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.9) Gecko/20100101 Goanna/3.4 Firefox/52.9 PaleMoon/27.9.0 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.9) Gecko/20100101 Goanna/3.4 Firefox/52.9 PaleMoon/27.9.1 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.9) Gecko/20100101 Goanna/3.4 Firefox/52.9 PaleMoon/27.9.2 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.9) Gecko/20100101 Goanna/3.4 Firefox/52.9 PaleMoon/27.9.3 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.9) Gecko/20100101 Goanna/4.1 Firefox/52.9 Basilisk/20180927 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.9) Gecko/20100101 Goanna/4.1 Firefox/52.9 PaleMoon/28.0.0 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.9) Gecko/20100101 Goanna/4.1 Firefox/52.9 PaleMoon/28.0.0a2 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.9) Gecko/20100101 Goanna/4.1 Firefox/52.9 PaleMoon/28.0.1 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.9) Gecko/20100101 Goanna/4.1 Firefox/52.9 PaleMoon/28.1.0 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:53.0) Gecko/20100101 Firefox/53.0 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:54.0) Gecko/20100101 Firefox/54.0 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:55.0) Gecko/20100101 Firefox/55.0 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:56.0) Gecko/20100101 Firefox/50.0 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:56.0) Gecko/20100101 Firefox/56.0 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:56.0) Gecko/20100101 Firefox/56.0 SeaMonkey/2.49.3 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:56.0) Gecko/20100101 Firefox/57.0 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:57.0) Gecko/20100101 Firefox/57.0 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:58.0) Gecko/20100101 Firefox/58.0 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:58.0) Gecko/20100101 Firefox/58.0 IceDragon/58.0.1 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:59.0) Gecko/20100101 Firefox/59.0 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:60.0) Gecko/20100101 Firefox/60.0 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:60.0) Gecko/20100101 Firefox/60.0 IceDragon/60.0.2 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:60.9) Gecko/20100101 Goanna/4.1 Firefox/60.9 PaleMoon/28.2.1 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:61.0) Gecko/20100101 Firefox/61.0 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:61.0) Gecko/20100101 Firefox/61.0 IceDragon/61.0 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:62.0) Gecko/20100101 Firefox/62.0 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:62.0) Gecko/20100101 Firefox/62.0 IceDragon/62.0.2 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:63.0) Gecko/20100101 Firefox/63.0 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:64.0) Gecko/20100101 Firefox/64.0 -Mozilla/5.0 (Windows NT 10.0; WOW64; rv:65.0) Gecko/20100101 Firefox/65.0 -Mozilla/5.0 (Windows NT 10.0; Win64; rv:54.0) Gecko/20100101 Firefox/54.0 -Mozilla/5.0 (Windows NT 10.0; Win64; rv:55.0) Gecko/20100101 Firefox/55.0 -Mozilla/5.0 (Windows NT 10.0; Win64; rv:59.0) Gecko/20100101 Firefox/59.0 -Mozilla/5.0 (Windows NT 10.0; Win64; rv:60.0) Gecko/20100101 Firefox/60.0 -Mozilla/5.0 (Windows NT 10.0; Win64; rv:61.0) Gecko/20100101 Firefox/61.0 -Mozilla/5.0 (Windows NT 10.0; Win64; rv:61.0) Gecko/20100101 Firefox/62.0 -Mozilla/5.0 (Windows NT 10.0; Win64; rv:62.0) Gecko/20100101 Firefox/62.0 -Mozilla/5.0 (Windows NT 10.0; Win64; rv:63.0) Gecko/20100101 Firefox/63.0 -Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:41.0) Gecko/20100101 Firefox/41.0 -Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:43.0) Gecko/20100101 Firefox/43.0 -Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:43.0) Gecko/20100101 Firefox/43.0.4 Waterfox/43.0.4 -Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:45.0) Gecko/20100101 Firefox/45.0 -Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:46.0) Gecko/20100101 Firefox/46.0.1 Waterfox/46.0.1 -Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0 -Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:49.0) Gecko/20100101 Firefox/49.0 -Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:50.0) Gecko/20100101 Firefox/50.0 -Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:51.0) Gecko/20100101 Firefox/51.0 -Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:52.0) Gecko/20100101 Firefox/52.0 -Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:52.0) Gecko/20100101 Firefox/52.0 Cyberfox/52.0.4 -Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:52.0) Gecko/20100101 Firefox/52.0 Cyberfox/52.5.0 -Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:52.0) Gecko/20100101 Firefox/52.0 Cyberfox/52.5.2 -Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:52.0) Gecko/20100101 Firefox/52.0 Cyberfox/52.7.2 -Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:52.0) Gecko/20100101 Firefox/52.0 Cyberfox/52.7.4 -Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:52.0) Gecko/20100101 Firefox/52.0 Cyberfox/52.8.0 -Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:52.0) Gecko/20100101 Firefox/52.0 Cyberfox/52.9.1 -Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:52.0) Gecko/20100101 Firefox/52.0.2 Waterfox/52.0.2 -Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:52.0) Gecko/20100101 Firefox/59.0 -Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:52.9) Gecko/20100101 Goanna/3.3 Firefox/52.9 PaleMoon/27.5.1 -Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:52.9) Gecko/20100101 Goanna/3.4 Firefox/52.9 PaleMoon/27.8.3 -Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:52.9) Gecko/20100101 Goanna/3.4 Firefox/52.9 PaleMoon/27.9.0 -Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:52.9) Gecko/20100101 Goanna/3.4 Firefox/52.9 PaleMoon/27.9.1 -Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:52.9) Gecko/20100101 Goanna/3.4 Firefox/52.9 PaleMoon/27.9.2 -Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:52.9) Gecko/20100101 Goanna/3.4 Firefox/52.9 PaleMoon/27.9.3 -Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:52.9) Gecko/20100101 Goanna/3.4 Firefox/52.9 PaleMoon/27.9.4 -Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:52.9) Gecko/20100101 Goanna/4.1 Firefox/52.9 Basilisk/20180424 -Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:52.9) Gecko/20100101 Goanna/4.1 Firefox/52.9 Basilisk/20180515 -Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:52.9) Gecko/20100101 Goanna/4.1 Firefox/52.9 Basilisk/20180601 -Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:52.9) Gecko/20100101 Goanna/4.1 Firefox/52.9 Basilisk/20180718 -Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:52.9) Gecko/20100101 Goanna/4.1 Firefox/52.9 Basilisk/20180905 -Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:52.9) Gecko/20100101 Goanna/4.1 Firefox/52.9 Basilisk/20180927 -Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:52.9) Gecko/20100101 Goanna/4.1 Firefox/52.9 PaleMoon/28.0.0 -Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:52.9) Gecko/20100101 Goanna/4.1 Firefox/52.9 PaleMoon/28.0.0.1 -Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:52.9) Gecko/20100101 Goanna/4.1 Firefox/52.9 PaleMoon/28.0.1 -Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:52.9) Gecko/20100101 Goanna/4.1 Firefox/52.9 PaleMoon/28.1.0 -Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:53.0) Gecko/20100101 Firefox/53.0 -Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:54.0) Gecko/20100101 Firefox/54.0 -Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:54.0) Gecko/20100101 Firefox/54.0.1 Waterfox/54.0.1 -Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:55.0) Gecko/20100101 Firefox/55.0 -Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:56.0) Gecko/20100101 Firefox/56.0 -Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:56.0) Gecko/20100101 Firefox/56.0.1 Waterfox/56.0.1 -Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:56.0) Gecko/20100101 Firefox/56.0.4 Waterfox/56.0.4 -Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:56.0; Waterfox) Gecko/20100101 Firefox/56.2.3 -Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:56.0; Waterfox) Gecko/20100101 Firefox/56.2.4 -Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:56.0; Waterfox) Gecko/20100101 Firefox/56.2.5 -Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0 -Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:57.0) Gecko/20100101 Firefox/59.0 -Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:58.0) Gecko/20100101 Firefox/58.0 -Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:59.0) Gecko/20100101 Firefox/59.0 -Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:60.0) Gecko/20100101 Firefox/60.0 -Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:62.0) Gecko/20100101 Firefox/62.0 -Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:63.0) Gecko/20100101 Firefox/63.0 -Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:64.0) Gecko/20100101 Firefox/64.0 -Mozilla/5.0 (Windows NT 10.0; rv:44.0) Gecko/20100101 Firefox/44.0.1 -Mozilla/5.0 (Windows NT 10.0; rv:45.0) Gecko/20100101 Firefox/45.0 -Mozilla/5.0 (Windows NT 10.0; rv:47.0) Gecko/20100101 Firefox/47.0 -Mozilla/5.0 (Windows NT 10.0; rv:49.0) Gecko/20100101 Firefox/49.0 -Mozilla/5.0 (Windows NT 10.0; rv:50.0) Gecko/20100101 Firefox/50.0 -Mozilla/5.0 (Windows NT 10.0; rv:51.0) Gecko/20100101 Firefox/51.0 -Mozilla/5.0 (Windows NT 10.0; rv:52.0) Gecko/20100101 Firefox/52.0 -Mozilla/5.0 (Windows NT 10.0; rv:52.0) Gecko/20100101 Firefox/52.0 Cyberfox/52.7.2 -Mozilla/5.0 (Windows NT 10.0; rv:52.0) Gecko/20100101 Firefox/52.0 Cyberfox/52.9.1 -Mozilla/5.0 (Windows NT 10.0; rv:52.0) Gecko/20100101 Firefox/52.0 SeaMonkey/2.49.4 -Mozilla/5.0 (Windows NT 10.0; rv:52.9) Gecko/20100101 Goanna/3.4 Firefox/52.9 PaleMoon/27.9.1 -Mozilla/5.0 (Windows NT 10.0; rv:52.9) Gecko/20100101 Goanna/3.4 Firefox/52.9 PaleMoon/27.9.1a1 -Mozilla/5.0 (Windows NT 10.0; rv:52.9) Gecko/20100101 Goanna/3.4 Firefox/52.9 PaleMoon/27.9.3 -Mozilla/5.0 (Windows NT 10.0; rv:52.9) Gecko/20100101 Goanna/4.1 Firefox/52.9 PaleMoon/28.1.0 -Mozilla/5.0 (Windows NT 10.0; rv:53.0) Gecko/20100101 Firefox/53.0 -Mozilla/5.0 (Windows NT 10.0; rv:55.0) Gecko/20100101 Firefox/55.0 -Mozilla/5.0 (Windows NT 10.0; rv:56.0) Gecko/20100101 Firefox/56.0 -Mozilla/5.0 (Windows NT 10.0; rv:57.0) Gecko/20100101 Firefox/57.0 -Mozilla/5.0 (Windows NT 10.0; rv:58.0) Gecko/20100101 Firefox/58.0 -Mozilla/5.0 (Windows NT 10.0; rv:59.0) Gecko/20100101 Firefox/59.0 -Mozilla/5.0 (Windows NT 10.0; rv:60.0) Gecko/20100101 Firefox/60.0 -Mozilla/5.0 (Windows NT 10.0; rv:61.0) Gecko/20100101 Firefox/61.0 -Mozilla/5.0 (Windows NT 10.0; rv:62.0) Gecko/20100101 Firefox/62.0 -Mozilla/5.0 (Windows NT 10.0; rv:63.0) Gecko/20100101 Firefox/63.0 -Mozilla/5.0 (Windows NT 10.0; rv:64.0) Gecko/20100101 Firefox/64.0 -Mozilla/5.0 (Windows NT 4.0; rv:52.0) Gecko/20100101 Firefox/52.0 -Mozilla/5.0 (Windows NT 5.1; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0 -Mozilla/5.0 (Windows NT 5.1; WOW64; rv:52.0) Gecko/20100101 Firefox/52.0 -Mozilla/5.0 (Windows NT 5.1; WOW64; rv:61.0) Gecko/20100101 Firefox/61.0 -Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 -Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36 -Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36 -Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36 -Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36 -Mozilla/5.0 (Windows NT 6.1; WOW64; rv:55.0) Gecko/20100101 Firefox/55.0 -Mozilla/5.0 (Windows NT 6.1; WOW64; rv:63.0) Gecko/20100101 Firefox/63.0 -Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36 -Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36 -Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 -Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36 -Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36 -Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36 -Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36 -Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:52.0) Gecko/20100101 Firefox/52.0 -Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:60.0) Gecko/20100101 Firefox/60.0 -Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:63.0) Gecko/20100101 Firefox/63.0 -Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:64.0) Gecko/20100101 Firefox/64.0 -Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0 -Mozilla/5.0 (Windows NT 6.1; rv:63.0) Gecko/20100101 Firefox/63.0 -Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 -Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36 -Mozilla/5.0 (Windows NT 6.3; Win64; x64; rv:63.0) Gecko/20100101 Firefox/63.0 -Mozilla/5.0 (X11; CrOS x86_64 11021.81.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36 -Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.106 Safari/537.36 -Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36 -Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36 -Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 -Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36 -Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36 -Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36 -Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36 -Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36 -Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/70.0.3538.77 Chrome/70.0.3538.77 Safari/537.36 -Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0 -Mozilla/5.0 (X11; Linux x86_64; rv:60.0) Gecko/20100101 Firefox/60.0 -Mozilla/5.0 (X11; Linux x86_64; rv:63.0) Gecko/20100101 Firefox/63.0 -Mozilla/5.0 (X11; Linux x86_64; rv:64.0) Gecko/20100101 Firefox/64.0 -Mozilla/5.0 (X11; OpenBSD amd64; rv:56.0) Gecko/20100101 Firefox/66.0 -Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:63.0) Gecko/20100101 Firefox/63.0 -Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:64.0) Gecko/20100101 Firefox/64.0