This commit is contained in:
TheTechRobo 2022-05-01 18:51:51 -04:00
parent b5b36df745
commit 0bec576ea8
13 changed files with 215 additions and 3899 deletions

1053
JSON.lua

File diff suppressed because it is too large Load Diff

7
Makefile Normal file
View File

@ -0,0 +1,7 @@
run:
make clean
docker build -t img .
docker run --rm img test
clean:
rm -rf img

View File

@ -1,64 +0,0 @@
utm_source
utm_medium
utm_campaign
utm_term
utm_content
utm_adgroup
ref
refsrc
referrer_id
referrerid
src
i
s
ts
feature
jsessionid
phpsessid
aspsessionid
sessionid
zenid
sid
gclid
fb_xd_fragment
fb_comment_id
fbclid
cfid
cftoken
doing_wp_cron
pk_cpn
pk_campaign
pk_kwd
pk_keyword
piwik_campaign
piwik_kwd
ga_source
ga_medium
ga_term
ga_content
ga_campaign
ga_place
yclid
_openstat
fb_action_ids
fb_action_types
fb_source
fb_ref
action_object_map
action_type_map
action_ref_map
gs_l
mkt_tok
hmb_campaign
hmb_medium
hmb_source
rand
wicket:antiCache
cachebuster
nocache
vs
dilid
script_case_session
cid
extid
_flowexecutionkey

View File

@ -1,33 +0,0 @@
/action/consumeSharedSessionAction
/action/consumeSsoCookie
/action/getSharedSiteSession
/juris/error%.jsf
facebook%.com/login%.php
facebook%.com/cookie/
facebook%.com/plugins/
facebook%.com/sharer/
facebook%.com/sharer%.php
gongquiz%.com.+&historyNo=[0-9]+
univis%.univie%.ac%.at/ausschreibungstellensuche/
fundraise%.cancerresearchuk%.org/signup/account/
mma%.ft%.com
^https?://dmg%.go%-2b%-planer%.de/
^https?://3d%.espace%-aubade%.fr/
^https?://kuechenplaner%.[^/]+/cloud/
^https?://3d%-salledebains%.geberit%.fr/
^https?://bibliotekanauki%.ceon%.pl/yadda/search/general%.action
^https?://[^/]+%.icm%.edu%.pl/.*search/article%.action
^https?://interamt%.de/koop/app/
^https?://tesiunam%.dgb%.unam%.mx/F/
^https?://[^%.]+%.sedelectronica%.es/.*%?x=
^https?://www%.cp%-cc%.org/programs%-services/
/ibank/_crypt_
%%7B%%7B.+%%7D%%7D
^https?://[^/]+/"
^http://[0-9a-z][0-9a-z][0-9a-z][0-9][0-9][0-9]?%.[^%./]+%.com/$
^http://[0-9a-z][0-9a-z][0-9a-z][0-9][0-9][0-9]?%.[^%./]+%.com/[a-z]+%.?[a-z][a-z][a-z]?$
^http://[0-9a-z][0-9a-z][0-9a-z][0-9][0-9][0-9]?%.[^%./]+%.com/[a-z]+/[a-z]+[0-9]*%.?[a-z][a-z][a-z]?$
^https?://[^/]*yahoo%.com/.+%%5C.+at%.atwola%.com
^https?://[^/]*at%.atwola%.com/
^https?://www%.bafa%.de/
%%5C%%22

1
boilerplate.py Normal file
View File

@ -0,0 +1 @@

File diff suppressed because it is too large Load Diff

122
gmd.lua Normal file
View File

@ -0,0 +1,122 @@
strin = "2~NzUwMCBzdGFycyBjOg==~4~3~9~1 month~6~1803945|2~SSBiZWF0IDYgaW5zYW5lIGRlbW9ucyBpbiAyNCBob3VycyBsbWFvOiBOZWNyb3BvbGlzLCBUaGUgQ2F2ZXJucyBJSSwgRWxlbWVudHMgWCwgWCBBZHZlbnR1cmUsIFNhZGlzbSwgYW5kIEJsYXN0ZXIgYzo=~4~21~9~8 months~6~1793260|2~L1wvXC9cIDwz~4~6~9~1 year~6~1785414|2~U2VudCBmcm9tIGlPUyBTaG9ydGN1dHMh~4~8~9~1 year~6~1776426|2~VGhpcyBjb21tZW50IHdhcyB1cGxvYWRlZCBmb3IgdGhlIEdEIERvY3Mh~4~5~9~1 year~6~1772719|2~VGhlIHRyaWxvZ3kgaGFzIGJlZW4gY29tcGxldGVkLi4uR0cgQWZ0ZXJtYXRoIQ==~4~8~9~1 year~6~1766450|2~Im93byIgLSBGb3VuZG15YmFsbA==~4~4~9~1 year~6~1766338|2~NTAwMCBzdGFycyE=~4~12~9~2 years~6~1756926|2~Qmxvb2RiYXRoIEdHISEh~4~24~9~2 years~6~1745624|2~QWxsZWdpYW5jZSAxMDAl~4~3~9~2 years~6~1744292#73:0:10"
-- https://stackoverflow.com/questions/40149617/split-string-with-specified-delimiter-in-lua
function split(s, sep)
local fields = {}
local sep = sep or " "
local pattern = string.format("([^%s]+)", sep)
string.gsub(s, pattern, function(c) fields[#fields + 1] = c end)
return fields
end
-- https://stackoverflow.com/questions/40149617/split-string-with-specified-delimiter-in-lua
--
GMD = {}
GMD["comments"] = {}
function table.show(t, name, indent)
local cart -- a container
local autoref -- for self references
--[[ counts the number of elements in a table
local function tablecount(t)
local n = 0
for _, _ in pairs(t) do n = n+1 end
return n
end
]]
-- (RiciLake) returns true if the table is empty
local function isemptytable(t) return next(t) == nil end
local function basicSerialize (o)
local so = tostring(o)
if type(o) == "function" then
local info = debug.getinfo(o, "S")
-- info.name is nil because o is not a calling level
if info.what == "C" then
return string.format("%q", so .. ", C function")
else
-- the information is defined through lines
return string.format("%q", so .. ", defined in (" ..
info.linedefined .. "-" .. info.lastlinedefined ..
")" .. info.source)
end
elseif type(o) == "number" or type(o) == "boolean" then
return so
else
return string.format("%q", so)
end
end
local function addtocart (value, name, indent, saved, field)
indent = indent or ""
saved = saved or {}
field = field or name
cart = cart .. indent .. field
if type(value) ~= "table" then
cart = cart .. " = " .. basicSerialize(value) .. ";\n"
else
if saved[value] then
cart = cart .. " = {}; -- " .. saved[value]
.. " (self reference)\n"
autoref = autoref .. name .. " = " .. saved[value] .. ";\n"
else
saved[value] = name
--if tablecount(value) == 0 then
if isemptytable(value) then
cart = cart .. " = {};\n"
else
cart = cart .. " = {\n"
for k, v in pairs(value) do
k = basicSerialize(k)
local fname = string.format("%s[%s]", name, k)
field = string.format("[%s]", k)
-- three spaces between levels
addtocart(v, fname, indent .. " ", saved, field)
end
cart = cart .. indent .. "};\n"
end
end
end
end
name = name or "__unnamed__"
if type(t) ~= "table" then
return name .. " = " .. basicSerialize(t)
end
cart, autoref = "", ""
addtocart(t, name, indent)
return cart .. autoref
end
GMD["comments"]["parse"] = function(comment)
local splitted = split(comment, ":")
local retern = {}
retern.comment = splitted[1]
retern.account = splitted[2]
retern.parsed = {}
retern.parsed.comment = {}
local data = split(retern.comment, "|")
for i=1, #data do
retern.parsed.comment[i] = {}
-- comment parser
local ndata = split(data[i], "~")
for j=1, #ndata do
if not (j % 2 == 0) then -- key
key = ndata[j]
else -- value
local value = ndata[j]
retern.parsed.comment[i][key] = value
end
end
end
-- print("DONE")
-- print(table.show(retern.parsed.comment))
return retern
end
print(GMD["comments"]["parse"](strin))

1
grab.lua Normal file
View File

@ -0,0 +1 @@

View File

@ -1,21 +0,0 @@
[%?&]ver=[0-9a-zA-Z%.]*%.16[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]
[%?&]ver=16[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]
[%?&]t=16[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]$
[%?&]t=16[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]%.[0-9]+$
[%?&]hash=16[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]$
%?16[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]$
%?16[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]$
%?6[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]$
%?v=[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]$
;extid=[0-9a-f]+$
[%?&;]_flowexecutionkey=
[%?&;]sid=
[%?&;]cid=
[%?&;]jsessionid=
[%?&;]script_case_session=
[%?&;]Dilid=
[%?&;][pP][hH][pP][sS][eE][sS][sS][iI][dD]=
[%?&;]wtd=
[%?&;]nonce=
[%?&;]rnd=
^https?://[^/]+/index%.php%?s=

View File

@ -1,17 +0,0 @@
%.apng
%.avif
%.gif
%.jpe?g
%.jfif
%.pjpeg
%.pjp
%.png
%.svg
%.webp
%.bmp
%.ico
%.cur
%.tif
%.tiff
%.js
%.css

View File

@ -1,50 +1,41 @@
# encoding=utf8
import datetime
from distutils.version import StrictVersion
import hashlib
import json
import os
import random
import shutil
import socket
import subprocess
import sys
import threading
import time
import string
import sys
###################
###GEOMETRY DASH###
###GRAB SCRIPTS####
###################
if sys.version_info[0] < 3:
from urllib import unquote
from urlparser import parse_qs
else:
from urllib.parse import unquote, parse_qs
# Based heavily off of ArchiveTeam/urls-grab
import requests
import seesaw
from seesaw.config import realize, NumberConfigValue
from seesaw.project import *
from seesaw.tracker import *
from seesaw.util import *
from seesaw.pipeline import Pipeline
from seesaw.externalprocess import WgetDownload
from seesaw.item import ItemInterpolation, ItemValue
from seesaw.pipeline import Pipeline
from seesaw.project import Project
from seesaw.task import SimpleTask, LimitConcurrent
from seesaw.tracker import GetItemFromTracker, PrepareStatsForTracker, \
UploadWithTracker, SendDoneToTracker
from seesaw.util import find_executable
import zstandard
if StrictVersion(seesaw.__version__) < StrictVersion('0.8.5'):
raise Exception('This pipeline needs seesaw version 0.8.5 or higher.')
LOCK = threading.Lock()
import hashlib
import shutil
import socket
import sys
project = Project(
title = "Geometry Dash",
project_html = """
<h2>Geometry Dash</h2>
<p>Time to archive Geometry Dash?</p>
""",
)
###########################################################################
# Find a useful Wget+Lua executable.
# The version number of this pipeline definition.
#
# WGET_AT will be set to the first path that
# 1. does not crash with --version, and
# 2. prints the required version string
# Update this each time you make a non-cosmetic change.
# It will be added to the WARC files and reported to the tracker.
VERSION = '20220428.01'
#USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.183 Safari/537.36'
TRACKER_ID = 'geometrytrash'
TRACKER_HOST = '172.17.0.1:8501'
WGET_AT = find_executable(
'Wget+AT',
@ -60,25 +51,6 @@ WGET_AT = find_executable(
if not WGET_AT:
raise Exception('No usable Wget+At found.')
###########################################################################
# The version number of this pipeline definition.
#
# Update this each time you make a non-cosmetic change.
# It will be added to the WARC files and reported to the tracker.
VERSION = '20220423.01'
#USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.183 Safari/537.36'
TRACKER_ID = 'urls'
TRACKER_HOST = 'legacy-api.arpa.li'
MULTI_ITEM_SIZE = 40
MAX_DUPES_LIST_SIZE = 10000
###########################################################################
# This section defines project-specific tasks.
#
# Simple tasks (tasks that do not need any concurrency) are based on the
# SimpleTask class and have a process(item) method that is called for
# each item.
class CheckIP(SimpleTask):
def __init__(self):
SimpleTask.__init__(self, 'CheckIP')
@ -112,16 +84,6 @@ class CheckIP(SimpleTask):
self._counter -= 1
class CheckRequirements(SimpleTask):
def __init__(self):
SimpleTask.__init__(self, 'CheckRequirements')
self._checked = False
def process(self, item):
if not self._checked:
assert shutil.which('pdftohtml') is not None
self._checked = True
class PrepareDirectories(SimpleTask):
def __init__(self, warc_prefix):
@ -146,77 +108,8 @@ class PrepareDirectories(SimpleTask):
time.strftime('%Y%m%d-%H%M%S')
])
if not os.path.isfile('duplicate-urls.txt'):
open('duplicate-urls.txt', 'w').close()
open('%(item_dir)s/%(warc_file_base)s.warc.zst' % item, 'w').close()
open('%(item_dir)s/%(warc_file_base)s_bad-urls.txt' % item, 'w').close()
open('%(item_dir)s/%(warc_file_base)s_duplicate-urls.txt' % item, 'w').close()
class MoveFiles(SimpleTask):
def __init__(self):
SimpleTask.__init__(self, 'MoveFiles')
def process(self, item):
os.rename('%(item_dir)s/%(warc_file_base)s.warc.zst' % item,
'%(data_dir)s/%(warc_file_base)s.%(dict_project)s.%(dict_id)s.warc.zst' % item)
shutil.rmtree('%(item_dir)s' % item)
class SetBadUrls(SimpleTask):
def __init__(self):
SimpleTask.__init__(self, 'SetBadUrls')
def unquote_url(self, url):
temp = unquote(url)
while url != temp:
url = temp
temp = unquote(url)
return url
def process(self, item):
item['item_name_original'] = item['item_name']
items = item['item_name'].split('\0')
items_lower = [self.unquote_url(url).strip().lower() for url in item['item_urls']]
with open('%(item_dir)s/%(warc_file_base)s_bad-urls.txt' % item, 'r') as f:
for url in {
self.unquote_url(url).strip().lower() for url in f
}:
index = items_lower.index(url)
items.pop(index)
items_lower.pop(index)
item['item_name'] = '\0'.join(items)
class SetDuplicateUrls(SimpleTask):
def __init__(self):
SimpleTask.__init__(self, 'SetNewDuplicates')
def process(self, item):
with LOCK:
self._process(item)
def _process(self, item):
with open('duplicate-urls.txt', 'r') as f:
duplicates = {s.strip() for s in f}
with open('%(item_dir)s/%(warc_file_base)s_duplicate-urls.txt' % item, 'r') as f:
for url in f:
duplicates.add(url.strip())
with open('duplicate-urls.txt', 'w') as f:
# choose randomly, to cycle periodically popular URLs
duplicates = list(duplicates)
random.shuffle(duplicates)
f.write('\n'.join(duplicates[:MAX_DUPES_LIST_SIZE]))
class MaybeSendDoneToTracker(SendDoneToTracker):
def enqueue(self, item):
if len(item['item_name']) == 0:
return self.complete_item(item)
return super(MaybeSendDoneToTracker, self).enqueue(item)
open('%(item_dir)s/%(warc_file_base)s.warc.gz' % item, 'w').close()
open('%(item_dir)s/%(warc_file_base)s_retry-urls.txt' % item, 'w').close()
def get_hash(filename):
with open(filename, 'rb') as in_file:
@ -224,104 +117,65 @@ def get_hash(filename):
CWD = os.getcwd()
PIPELINE_SHA1 = get_hash(os.path.join(CWD, 'pipeline.py'))
LUA_SHA1 = get_hash(os.path.join(CWD, 'urls.lua'))
LUA_SHA1 = get_hash(os.path.join(CWD, 'grab.lua'))
GMD_LUA_SHA1 = get_hash(os.path.join(CWD, 'gmd.lua'))
def stats_id_function(item):
d = {
'pipeline_hash': PIPELINE_SHA1,
'lua_hash': LUA_SHA1,
'gmd_lua_hash': GMD_LUA_SHA1,
'python_version': sys.version,
}
return d
class MoveFiles(SimpleTask):
def __init__(self):
SimpleTask.__init__(self, 'MoveFiles')
class ZstdDict(object):
created = 0
data = None
def process(self, item):
os.rename('%(item_dir)s/%(warc_file_base)s.warc.gz' % item,
'%(data_dir)s/%(warc_file_base)s.warc.gz' % item)
@classmethod
def get_dict(cls):
if cls.data is not None and time.time() - cls.created < 1800:
return cls.data
response = requests.get(
'https://legacy-api.arpa.li/dictionary',
params={
'project': TRACKER_ID
}
)
response.raise_for_status()
response = response.json()
if cls.data is not None and response['id'] == cls.data['id']:
cls.created = time.time()
return cls.data
print('Downloading latest dictionary.')
response_dict = requests.get(response['url'])
response_dict.raise_for_status()
raw_data = response_dict.content
if hashlib.sha256(raw_data).hexdigest() != response['sha256']:
raise ValueError('Hash of downloaded dictionary does not match.')
if raw_data[:4] == b'\x28\xB5\x2F\xFD':
raw_data = zstandard.ZstdDecompressor().decompress(raw_data)
cls.data = {
'id': response['id'],
'dict': raw_data
}
cls.created = time.time()
return cls.data
shutil.rmtree('%(item_dir)s' % item)
class AwfulBackfeed(SimpleTask):
def __init__(self):
SimpleTask.__init__(self, 'AwfulBackfeed')
def process(self, item):
with open('%(item_dir)s/new_items' % item) as file:
new_items = file.read()
class WgetArgs(object):
def realize(self, item):
with open('user-agents.txt', 'r') as f:
USER_AGENT = random.choice(list(f)).strip()
wget_args = [
'timeout', '1000',
WGET_AT,
'-U', USER_AGENT,
'-v',
'--content-on-error',
'--lua-script', 'urls.lua',
'--lua-script', 'grab.lua',
'-o', ItemInterpolation('%(item_dir)s/wget.log'),
#'--no-check-certificate',
'--output-document', ItemInterpolation('%(item_dir)s/wget.tmp'),
'--truncate-output',
'-e', 'robots=off',
'--rotate-dns',
'--recursive', '--level=inf',
'--no-parent',
'--timeout', '10',
'--tries', '2',
'--tries', '10',
'--span-hosts',
'--page-requisites',
'--waitretry', '0',
'--waitretry', '5000',
'--warc-file', ItemInterpolation('%(item_dir)s/%(warc_file_base)s'),
'--warc-header', 'operator: Archive Team',
'--warc-header', 'operator: TheTechRobo <thetechrobo@protonmail.ch>',
'--warc-header', 'x-wget-at-project-version: ' + VERSION,
'--warc-header', 'x-wget-at-project-name: ' + TRACKER_ID,
'--warc-dedup-url-agnostic',
'--warc-compression-use-zstd',
'--warc-zstd-dict-no-include',
'--header', 'Contact: Discord TheTechRobo#7420',
'--header', 'Connection: keep-alive',
'--header', 'Accept-Language: en-US;q=0.9, en;q=0.8'
]
dict_data = ZstdDict.get_dict()
with open(os.path.join(item['item_dir'], 'zstdict'), 'wb') as f:
f.write(dict_data['dict'])
item['dict_id'] = dict_data['id']
item['dict_project'] = TRACKER_ID
wget_args.extend([
'--warc-zstd-dict', ItemInterpolation('%(item_dir)s/zstdict'),
])
item['item_name'] = '\0'.join([
item_name for item_name in item['item_name'].split('\0')
if (item_name.startswith('custom:') and '&url=' in item_name) \
or item_name.startswith('http://') \
or item_name.startswith('https://') \
])
item['item_name_newline'] = item['item_name'].replace('\0', '\n')
item_urls = []
custom_items = {}
@ -329,17 +183,8 @@ class WgetArgs(object):
for item_name in item['item_name'].split('\0'):
wget_args.extend(['--warc-header', 'x-wget-at-project-item-name: '+item_name])
wget_args.append('item-name://'+item_name)
if item_name.startswith('custom:'):
data = parse_qs(item_name.split(':', 1)[1])
for k, v in data.items():
if len(v) == 1:
data[k] = v[0]
url = data['url']
custom_items[url.lower()] = data
else:
url = item_name
item_urls.append(url)
wget_args.append(url)
item_urls.append(item_name)
wget_args.append(item_name)
item['item_urls'] = item_urls
item['custom_items'] = json.dumps(custom_items)
@ -353,73 +198,36 @@ class WgetArgs(object):
return realize(wget_args, item)
###########################################################################
# Initialize the project.
#
# This will be shown in the warrior management panel. The logo should not
# be too big. The deadline is optional.
project = Project(
title = 'URLs',
project_html = '''
<img class="project-logo" alt="logo" src="https://archiveteam.org/images/thumb/f/f3/Archive_team.png/235px-Archive_team.png" height="50px"/>
<h2>Archiving sets of discovered outlinks. &middot; <a href="http://tracker.archiveteam.org/urls/">Leaderboard</a></span></h2>
'''
)
pipeline = Pipeline(
CheckIP(),
CheckRequirements(),
GetItemFromTracker('https://{}/{}/multi={}/'
.format(TRACKER_HOST, TRACKER_ID, MULTI_ITEM_SIZE),
downloader, VERSION),
PrepareDirectories(warc_prefix='urls'),
WgetDownload(
WgetArgs(),
max_tries=1,
accept_on_exit_code=[0, 4, 8],
env={
'item_dir': ItemValue('item_dir'),
'item_name': ItemValue('item_name_newline'),
'custom_items': ItemValue('custom_items'),
'warc_file_base': ItemValue('warc_file_base')
}
),
SetBadUrls(),
SetDuplicateUrls(),
PrepareStatsForTracker(
defaults={'downloader': downloader, 'version': VERSION},
file_groups={
'data': [
ItemInterpolation('%(item_dir)s/%(warc_file_base)s.warc.zst')
]
},
id_function=stats_id_function,
),
MoveFiles(),
LimitConcurrent(NumberConfigValue(min=1, max=20, default='2',
name='shared:rsync_threads', title='Rsync threads',
description='The maximum number of concurrent uploads.'),
UploadWithTracker(
'https://%s/%s' % (TRACKER_HOST, TRACKER_ID),
downloader=downloader,
version=VERSION,
files=[
ItemInterpolation('%(data_dir)s/%(warc_file_base)s.%(dict_project)s.%(dict_id)s.warc.zst')
],
rsync_target_source_path=ItemInterpolation('%(data_dir)s/'),
rsync_extra_args=[
'--recursive',
'--partial',
'--partial-dir', '.rsync-tmp',
'--min-size', '1',
'--no-compress',
'--compress-level', '0'
]
CheckIP(),
GetItemFromTracker('http://{}/{}'
.format(TRACKER_HOST, TRACKER_ID),
downloader, VERSION),
PrepareDirectories(warc_prefix='gmd'),
WgetDownload(
WgetArgs(),
max_tries=1,
accept_on_exit_code=[0, 4, 8],
env={
'item_dir': ItemValue('item_dir'),
'item_name': ItemValue('item_name_newline'),
'custom_items': ItemValue('custom_items'),
'warc_file_base': ItemValue('warc_file_base')
}
),
),
MaybeSendDoneToTracker(
tracker_url='https://%s/%s' % (TRACKER_HOST, TRACKER_ID),
stats=ItemValue('stats')
)
)
CheckLandslide(),
PrepareStatsForTracker(
defaults={'downloader': downloader, 'version': VERSION},
file_groups={
'data': [
ItemInterpolation('%(item_dir)s/%(warc_file_base)s.warc.gz')
]
},
id_function=stats_id_function,
),
MoveFiles(),
SendDoneToTracker(
tracker_url='http://%s/%s' % (TRACKER_HOST, TRACKER_ID),
stats=ItemValue('stats')
)
)

942
urls.lua
View File

@ -1,942 +0,0 @@
local urlparse = require("socket.url")
local http = require("socket.http")
JSON = (loadfile "JSON.lua")()
local item_dir = os.getenv("item_dir")
local item_name = os.getenv("item_name")
local custom_items = os.getenv("custom_items")
local warc_file_base = os.getenv("warc_file_base")
local url_count = 0
local downloaded = {}
local abortgrab = false
local exit_url = false
local min_dedup_mb = 5
local timestamp = nil
if urlparse == nil or http == nil then
io.stdout:write("socket not corrently installed.\n")
io.stdout:flush()
abortgrab = true
end
local urls = {}
for url in string.gmatch(item_name, "([^\n]+)") do
urls[string.lower(url)] = true
end
local urls_settings = JSON:decode(custom_items)
for k, _ in pairs(urls_settings) do
urls[string.lower(k)] = true
end
local status_code = nil
local redirect_urls = {}
local visited_urls = {}
local ids_to_ignore = {}
for _, lengths in pairs({{8, 4, 4, 4, 12}, {8, 4, 4, 12}}) do
local uuid = ""
for _, i in pairs(lengths) do
for j=1,i do
uuid = uuid .. "[0-9a-fA-F]"
end
if i ~= 12 then
uuid = uuid .. "%-"
end
end
ids_to_ignore[uuid] = true
end
local to_ignore = ""
for i=1,9 do
to_ignore = to_ignore .. "[0-9]"
end
ids_to_ignore["%?" .. to_ignore .. "$"] = true
ids_to_ignore["%?" .. to_ignore .. "[0-9]$"] = true
ids_to_ignore[to_ignore .. "[0-9]%.[0-9][0-9][0-9][0-9]$"] = true
to_ignore = ""
for i=1,50 do
to_ignore = to_ignore .. "[0-9a-zA-Z]"
end
ids_to_ignore[to_ignore .. "%-[0-9][0-9][0-9][0-9][0-9]"] = true
ids_to_ignore["[0-9a-zA-Z%-_]!%-?[0-9]"] = true
to_ignore = ""
for i=1,32 do
to_ignore = to_ignore .. "[0-9a-fA-F]"
end
ids_to_ignore["[^0-9a-fA-F]" .. to_ignore .. "[^0-9a-fA-F]"] = true
ids_to_ignore["[^0-9a-fA-F]" .. to_ignore .. "$"] = true
local current_url = nil
local current_settings = nil
local bad_urls = {}
local queued_urls = {}
local bad_params = {}
local bad_patterns = {}
local ignore_patterns = {}
local page_requisite_patterns = {}
local duplicate_urls = {}
local extract_outlinks_patterns = {}
local item_first_url = nil
local redirect_domains = {}
local checked_domains = {}
local parenturl_uuid = nil
local parenturl_requisite = nil
local dupes_file = io.open("duplicate-urls.txt", "r")
for url in dupes_file:lines() do
duplicate_urls[url] = true
end
dupes_file:close()
local bad_params_file = io.open("bad-params.txt", "r")
for param in bad_params_file:lines() do
local param = string.gsub(
param, "([a-zA-Z])",
function(c)
return "[" .. string.lower(c) .. string.upper(c) .. "]"
end
)
table.insert(bad_params, param)
end
bad_params_file:close()
local bad_patterns_file = io.open("bad-patterns.txt", "r")
for pattern in bad_patterns_file:lines() do
table.insert(bad_patterns, pattern)
end
bad_patterns_file:close()
local ignore_patterns_file = io.open("ignore-patterns.txt", "r")
for pattern in ignore_patterns_file:lines() do
table.insert(ignore_patterns, pattern)
end
ignore_patterns_file:close()
local page_requisite_patterns_file = io.open("page-requisite-patterns.txt", "r")
for pattern in page_requisite_patterns_file:lines() do
table.insert(page_requisite_patterns, pattern)
end
page_requisite_patterns_file:close()
local extract_outlinks_patterns_file = io.open("extract-outlinks-patterns.txt", "r")
for pattern in extract_outlinks_patterns_file:lines() do
extract_outlinks_patterns[pattern] = true
end
extract_outlinks_patterns_file:close()
read_file = function(file, bytes)
if not bytes then
bytes = "*all"
end
if file then
local f = assert(io.open(file))
local data = f:read(bytes)
f:close()
if not data then
data = ""
end
return data
else
return ""
end
end
table_length = function(t)
local count = 0
for _ in pairs(t) do
count = count + 1
end
return count
end
check_domain_outlinks = function(url, target)
local parent = string.match(url, "^https?://([^/]+)")
while parent do
if (not target and extract_outlinks_patterns[parent])
or (target and parent == target) then
return parent
end
parent = string.match(parent, "^[^%.]+%.(.+)$")
end
return false
end
bad_code = function(status_code)
return status_code ~= 200
and status_code ~= 301
and status_code ~= 302
and status_code ~= 303
and status_code ~= 307
and status_code ~= 308
and status_code ~= 404
and status_code ~= 410
end
find_path_loop = function(url, max_repetitions)
local tested = {}
for s in string.gmatch(urlparse.unescape(url), "([^/]+)") do
s = string.lower(s)
if not tested[s] then
if s == "" then
tested[s] = -2
else
tested[s] = 0
end
end
tested[s] = tested[s] + 1
if tested[s] == max_repetitions then
return true
end
end
return false
end
percent_encode_url = function(url)
temp = ""
for c in string.gmatch(url, "(.)") do
local b = string.byte(c)
if b < 32 or b > 126 then
c = string.format("%%%02X", b)
end
temp = temp .. c
end
return temp
end
queue_url = function(url, withcustom)
if not url then
return nil
end
queue_new_urls(url)
if not string.match(url, "^https?://[^/]+%.") then
return nil
end
--local original = url
load_setting_depth = function(s)
n = tonumber(current_settings[s])
if n == nil then
n = 0
end
return n - 1
end
url = string.gsub(url, "'%s*%+%s*'", "")
url = percent_encode_url(url)
url = string.match(url, "^([^{]+)")
url = string.match(url, "^([^<]+)")
url = string.match(url, "^([^\\]+)")
if current_settings and current_settings["all"] and withcustom then
local depth = load_setting_depth("depth")
local keep_random = load_setting_depth("keep_random")
local keep_all = load_setting_depth("keep_all")
local any_domain = load_setting_depth("any_domain")
if depth >= 0 then
local random = current_settings["random"]
local all = current_settings["all"]
if keep_random < 0 or random == "" then
random = nil
keep_random = nil
end
if keep_all < 0 or all == 0 then
all = nil
keep_all = nil
end
if any_domain <= 0 then
any_domain = nil
end
local settings = {
depth=depth,
all=all,
keep_all=keep_all,
random=random,
keep_random=keep_random,
url=url,
any_domain=any_domain
}
url = "custom:"
for _, k in pairs(
{"all", "any_domain", "depth", "keep_all", "keep_random", "random", "url"}
) do
local v = settings[k]
if v ~= nil then
url = url .. k .. "=" .. urlparse.escape(tostring(v)) .. "&"
end
end
url = string.sub(url, 1, -2)
end
end
if not duplicate_urls[url] and not queued_urls[url] then
if find_path_loop(url, 2) then
return false
end
--print("queuing",original, url)
queued_urls[url] = true
end
end
queue_monthly_url = function(url)
local random_s = os.date("%Y%m", timestamp)
url = percent_encode_url(url)
queued_urls["custom:random=" .. random_s .. "&url=" .. urlparse.escape(tostring(url))] = true
end
remove_param = function(url, param_pattern)
local newurl = url
repeat
url = newurl
newurl = string.gsub(url, "([%?&;])" .. param_pattern .. "=[^%?&;]*[%?&;]?", "%1")
until newurl == url
return string.match(newurl, "^(.-)[%?&;]?$")
end
queue_new_urls = function(url)
if not url then
return nil
end
local newurl = string.gsub(url, "([%?&;])[aA][mM][pP];", "%1")
if url == current_url then
if newurl ~= url then
queue_url(newurl)
end
end
for _, param_pattern in pairs(bad_params) do
newurl = remove_param(newurl, param_pattern)
end
if newurl ~= url then
queue_url(newurl)
end
newurl = string.match(newurl, "^([^%?&]+)")
if newurl ~= url then
queue_url(newurl)
end
url = string.gsub(url, "&quot;", '"')
url = string.gsub(url, "&amp;", "&")
for newurl in string.gmatch(url, '([^"\\]+)') do
if newurl ~= url then
queue_url(newurl)
end
end
end
report_bad_url = function(url)
if current_url ~= nil then
bad_urls[current_url] = true
else
bad_urls[string.lower(url)] = true
end
end
strip_url = function(url)
url = string.match(url, "^https?://(.+)$")
newurl = string.match(url, "^www%.(.+)$")
if newurl then
url = newurl
end
return url
end
wget.callbacks.download_child_p = function(urlpos, parent, depth, start_url_parsed, iri, verdict, reason)
local url = urlpos["url"]["url"]
local parenturl = parent["url"]
local extract_page_requisites = false
local current_settings_all = current_settings and current_settings["all"]
local current_settings_any_domain = current_settings and current_settings["any_domain"]
--queue_monthly_url(string.match(url, "^(https?://[^/]+)") .. "/")
if redirect_urls[parenturl] and not (
status_code == 300 and string.match(parenturl, "^https?://[^/]*feb%-web%.ru/")
) then
return true
end
if find_path_loop(url, 2) then
return false
end
local _, count = string.gsub(url, "[/%?]", "")
if count >= 16 then
return false
end
for _, extension in pairs({
"pdf",
"doc[mx]?",
"xls[mx]?",
"ppt[mx]?",
"zip",
"odt",
"odm",
"ods",
"odp",
"xml",
"json",
"torrent"
}) do
if string.match(parenturl, "%." .. extension .. "$")
or string.match(parenturl, "%." .. extension .. "[^a-z0-9A-Z]")
or string.match(parenturl, "%." .. string.upper(extension) .. "$")
or string.match(parenturl, "%." .. string.upper(extension) .. "[^a-z0-9A-Z]") then
return false
end
if string.match(url, "%." .. extension .. "$")
or string.match(url, "%." .. extension .. "[^a-z0-9A-Z]")
or string.match(url, "%." .. string.upper(extension) .. "$")
or string.match(url, "%." .. string.upper(extension) .. "[^a-z0-9A-Z]") then
queue_url(url)
return false
end
end
local domain_match = checked_domains[item_first_url]
if not domain_match then
domain_match = check_domain_outlinks(item_first_url)
if not domain_match then
domain_match = "none"
end
checked_domains[item_first_url] = domain_match
end
if domain_match ~= "none" then
extract_page_requisites = true
local newurl_domain = string.match(url, "^https?://([^/]+)")
local to_queue = true
for domain, _ in pairs(redirect_domains) do
if check_domain_outlinks(url, domain) then
to_queue = false
break
end
end
if to_queue then
queue_url(url)
return false
end
end
--[[if not extract_page_requisites then
return false
end]]
if (status_code < 200 or status_code >= 300 or not verdict)
and not current_settings_all then
return false
end
--[[if string.len(url) == string.len(parenturl) then
local good_url = false
local index1, index2
temp_url = string.match(url, "^https?://(.+)$")
temp_parenturl = string.match(parenturl, "^https?://(.+)$")
local start_index = 1
repeat
index1 = string.find(temp_url, "/", start_index)
index2 = string.find(temp_parenturl, "/", start_index)
if index1 ~= index2 then
good_url = true
break
end
if index1 then
start_index = index1 + 1
end
until not index1 or not index2
if not good_url then
return false
end
end]]
if parenturl_uuid == nil then
parenturl_uuid = false
for old_parent_url, _ in pairs(visited_urls) do
for id_to_ignore, _ in pairs(ids_to_ignore) do
if string.match(old_parent_url, id_to_ignore) then
parenturl_uuid = true
break
end
end
if parenturl_uuid then
break
end
end
end
if parenturl_uuid then
for id_to_ignore, _ in pairs(ids_to_ignore) do
if string.match(url, id_to_ignore) and not current_settings_all then
return false
end
end
end
if urlpos["link_refresh_p"] ~= 0 then
queue_url(url)
return false
end
if parenturl_requisite == nil then
parenturl_requisite = false
for _, pattern in pairs(page_requisite_patterns) do
for old_parent_url, _ in pairs(visited_urls) do
if string.match(old_parent_url, pattern) then
parenturl_requisite = true
break
end
end
if parenturl_requisite then
break
end
end
end
if parenturl_requisite and not current_settings_all then
return false
end
if urlpos["link_inline_p"] ~= 0 then
queue_url(url)
return false
end
local current_host = string.match(urlpos["url"]["host"], "([^%.]+%.[^%.]+)$")
local first_parent_host = string.match(parent["host"], "([^%.]+%.[^%.]+)$")
if current_url then
first_parent_host = string.match(current_url .. "/", "^https?://[^/]-([^/%.]+%.[^/%.]+)/")
end
if current_settings_all and (
current_settings_any_domain
or first_parent_host == current_host
) then
queue_url(url, true)
return false
end
--[[for old_parent_url, _ in pairs(visited_urls) do
for _, pattern in pairs(page_requisite_patterns) do
if string.match(old_parent_url, pattern) then
return false
end
end
end
for _, pattern in pairs(page_requisite_patterns) do
if string.match(url, pattern) then
queue_url(url)
return false
end
end]]
end
wget.callbacks.get_urls = function(file, url, is_css, iri)
local html = nil
if url then
downloaded[url] = true
end
local function check(url, headers)
local url = string.match(url, "^([^#]+)")
url = string.gsub(url, "&amp;", "&")
queue_url(url)
end
local function checknewurl(newurl, headers)
if string.match(newurl, "^#") then
return nil
end
if string.match(newurl, "\\[uU]002[fF]") then
return checknewurl(string.gsub(newurl, "\\[uU]002[fF]", "/"), headers)
end
if string.match(newurl, "^https?:////") then
check(string.gsub(newurl, ":////", "://"), headers)
elseif string.match(newurl, "^https?://") then
check(newurl, headers)
elseif string.match(newurl, "^https?:\\/\\?/") then
check(string.gsub(newurl, "\\", ""), headers)
elseif not url then
return nil
elseif string.match(newurl, "^\\/") then
checknewurl(string.gsub(newurl, "\\", ""), headers)
elseif string.match(newurl, "^//") then
check(urlparse.absolute(url, newurl), headers)
elseif string.match(newurl, "^/") then
check(urlparse.absolute(url, newurl), headers)
elseif string.match(newurl, "^%.%./") then
if string.match(url, "^https?://[^/]+/[^/]+/") then
check(urlparse.absolute(url, newurl), headers)
else
checknewurl(string.match(newurl, "^%.%.(/.+)$"), headers)
end
elseif string.match(newurl, "^%./") then
check(urlparse.absolute(url, newurl), headers)
end
end
local function checknewshorturl(newurl, headers)
if string.match(newurl, "^#") then
return nil
end
if url and string.match(newurl, "^%?") then
check(urlparse.absolute(url, newurl), headers)
elseif url and not (string.match(newurl, "^https?:\\?/\\?//?/?")
or string.match(newurl, "^[/\\]")
or string.match(newurl, "^%./")
or string.match(newurl, "^[jJ]ava[sS]cript:")
or string.match(newurl, "^[mM]ail[tT]o:")
or string.match(newurl, "^vine:")
or string.match(newurl, "^android%-app:")
or string.match(newurl, "^ios%-app:")
or string.match(newurl, "^%${")) then
check(urlparse.absolute(url, newurl), headers)
else
checknewurl(newurl, headers)
end
end
if (status_code == 200 and current_settings and current_settings["deep_extract"])
or not url then
html = read_file(file)
if not url then
html = string.gsub(html, "&#160;", " ")
html = string.gsub(html, "&lt;", "<")
html = string.gsub(html, "&gt;", ">")
html = string.gsub(html, "&quot;", '"')
html = string.gsub(html, "&apos;", "'")
html = string.gsub(html, "&#(%d+);",
function(n)
return string.char(n)
end
)
html = string.gsub(html, "&#x(%d+);",
function(n)
return string.char(tonumber(n, 16))
end
)
local temp_html = string.gsub(html, "\n", "")
for _, remove in pairs({"", "<br/>", "</?p[^>]*>"}) do
if remove ~= "" then
temp_html = string.gsub(temp_html, remove, "")
end
for newurl in string.gmatch(temp_html, "(https?://[^%s<>#\"'\\`{})%]]+)") do
while string.match(newurl, "[%.&,!;]$") do
newurl = string.match(newurl, "^(.+).$")
end
check(newurl)
end
end
end
for newurl in string.gmatch(html, "[^%-][hH][rR][eE][fF]='([^']+)'") do
checknewshorturl(newurl)
end
for newurl in string.gmatch(html, '[^%-][hH][rR][eE][fF]="([^"]+)"') do
checknewshorturl(newurl)
end
for newurl in string.gmatch(string.gsub(html, "&[qQ][uU][oO][tT];", '"'), '"(https?://[^"]+)') do
checknewurl(newurl)
end
for newurl in string.gmatch(string.gsub(html, "&#039;", "'"), "'(https?://[^']+)") do
checknewurl(newurl)
end
if url then
for newurl in string.gmatch(html, ">%s*([^<%s]+)") do
checknewurl(newurl)
end
end
--[[for newurl in string.gmatch(html, "%(([^%)]+)%)") do
checknewurl(newurl)
end]]
elseif string.match(url, "^https?://[^/]+/.*[^a-z0-9A-Z][pP][dD][fF]$")
or string.match(url, "^https?://[^/]+/.*[^a-z0-9A-Z][pP][dD][fF][^a-z0-9A-Z]")
or string.match(read_file(file, 4), "%%[pP][dD][fF]") then
io.stdout:write("Extracting links from PDF.\n")
io.stdout:flush()
local temp_file = file .. "-html.html"
local check_file = io.open(temp_file)
if check_file then
check_file:close()
os.remove(temp_file)
end
os.execute("pdftohtml -nodrm -hidden -i -s -q " .. file)
check_file = io.open(temp_file)
if check_file then
check_file:close()
local temp_length = table_length(queued_urls)
wget.callbacks.get_urls(temp_file, nil, nil, nil)
io.stdout:write("Found " .. tostring(table_length(queued_urls)-temp_length) .. " URLs.\n")
io.stdout:flush()
os.remove(temp_file)
else
io.stdout:write("Not a PDF.\n")
io.stdout:flush()
end
end
end
wget.callbacks.write_to_warc = function(url, http_stat)
local url_lower = string.lower(url["url"])
if urls[url_lower] then
current_url = url_lower
current_settings = urls_settings[url_lower]
end
if current_settings and not current_settings["random"] then
queue_url(url["url"])
return false
end
if bad_code(http_stat["statcode"]) then
return false
elseif http_stat["statcode"] >= 300 and http_stat["statcode"] <= 399 then
local newloc = urlparse.absolute(url["url"], http_stat["newloc"])
if string.match(newloc, "^https?://[^/]*google%.com/sorry")
or string.match(newloc, "^https?://[^/]*google%.com/[sS]ervice[lL]ogin")
or string.match(newloc, "^https?://consent%.youtube%.com/")
or string.match(newloc, "^https?://consent%.google%.com/")
or string.match(newloc, "^https?://misuse%.ncbi%.nlm%.nih%.gov/")
or string.match(newloc, "^https?://myprivacy%.dpgmedia%.nl/")
or string.match(newloc, "^https?://idp%.springer%.com/authorize%?")
or string.match(newloc, "^https?://[^/]*instagram%.com/accounts/") then
report_bad_url(url["url"])
exit_url = true
return false
end
return true
elseif http_stat["statcode"] ~= 200 then
return true
end
if true then
return true
end
if http_stat["len"] > min_dedup_mb * 1024 * 1024 then
io.stdout:write("Data larger than " .. tostring(min_dedup_mb) .. " MB. Checking with Wayback Machine.\n")
io.stdout:flush()
while true do
local body, code, headers, status = http.request(
"https://web.archive.org/__wb/calendarcaptures/2"
.. "?url=" .. urlparse.escape(url["url"])
.. "&date=202"
)
if code ~= 200 then
io.stdout:write("Got " .. tostring(code) .. " from the Wayback Machine.\n")
io.stdout:flush()
os.execute("sleep 10")
else
data = JSON:decode(body)
if not data["items"] or not data["colls"] then
return true
end
for _, item in pairs(data["items"]) do
if item[2] == 200 then
local coll_id = item[3] + 1
if not coll_id then
io.stdout:write("Could get coll ID.\n")
io.stdout:flush()
end
local collections = data["colls"][coll_id]
if not collections then
io.stdout:write("Could not get collections.\n")
io.stdout:flush()
end
for _, collection in pairs(collections) do
if collection == "archivebot"
or string.find(collection, "archiveteam") then
io.stdout:write("Archive Team got this URL before.\n")
return false
end
end
end
end
break
end
end
end
return true
end
wget.callbacks.httploop_result = function(url, err, http_stat)
status_code = http_stat["statcode"]
parenturl_uuid = nil
parenturl_requisite = nil
local url_lower = string.lower(url["url"])
if urls[url_lower] then
current_url = url_lower
current_settings = urls_settings[url_lower]
end
if not timestamp then
local body, code, headers, status = http.request("https://legacy-api.arpa.li/now")
assert(code == 200)
timestamp = tonumber(string.match(body, "^([0-9]+)"))
end
if status_code ~= 0 then
local base_url = string.match(url["url"], "^(https://[^/]+)")
if base_url then
for _, newurl in pairs({
base_url .. "/robots.txt",
base_url .. "/favicon.ico",
base_url .. "/"
}) do
queue_monthly_url(newurl)
end
end
end
url_count = url_count + 1
io.stdout:write(url_count .. "=" .. status_code .. " " .. url["url"] .. " \n")
io.stdout:flush()
if redirect_domains["done"] then
redirect_domains = {}
redirect_urls = {}
visited_urls = {}
item_first_url = nil
end
redirect_domains[string.match(url["url"], "^https?://([^/]+)")] = true
if not item_first_url then
item_first_url = url["url"]
end
visited_urls[url["url"]] = true
if exit_url then
exit_url = false
return wget.actions.EXIT
end
if status_code >= 300 and status_code <= 399 then
local newloc = urlparse.absolute(url["url"], http_stat["newloc"])
redirect_urls[url["url"]] = true
--[[if strip_url(url["url"]) == strip_url(newloc) then
queued_urls[newloc] = true
return wget.actions.EXIT
end]]
if downloaded[newloc] then
return wget.actions.EXIT
elseif string.match(url["url"], "^https?://[^/]*telegram%.org/dl%?tme=")
or (
string.match(newloc, "^https?://www%.(.+)")
or string.match(newloc, "^https?://(.+)")
) == (
string.match(url["url"], "^https?://www%.(.+)")
or string.match(url["url"], "^https?://(.+)")
)
or status_code == 301
or status_code == 308 then
queue_url(newloc)
return wget.actions.EXIT
end
else
redirect_domains["done"] = true
end
if downloaded[url["url"]] then
report_bad_url(url["url"])
return wget.actions.EXIT
end
for _, pattern in pairs(ignore_patterns) do
if string.match(url["url"], pattern) then
return wget.actions.EXIT
end
end
if status_code >= 200 and status_code <= 399 then
downloaded[url["url"]] = true
end
if status_code >= 200 and status_code < 300 then
queue_new_urls(url["url"])
end
if bad_code(status_code) then
io.stdout:write("Server returned " .. http_stat.statcode .. " (" .. err .. ").\n")
io.stdout:flush()
report_bad_url(url["url"])
return wget.actions.EXIT
end
local sleep_time = 0
if sleep_time > 0.001 then
os.execute("sleep " .. sleep_time)
end
return wget.actions.NOTHING
end
wget.callbacks.finish = function(start_time, end_time, wall_time, numurls, total_downloaded_bytes, total_download_time)
local function submit_backfeed(newurls)
local tries = 0
local maxtries = 4
while tries < maxtries do
local body, code, headers, status = http.request(
"https://legacy-api.arpa.li/backfeed/legacy/urls-glx7ansh4e17aii",
newurls .. "\0"
)
print(body)
if code == 200 then
io.stdout:write("Submitted discovered URLs.\n")
io.stdout:flush()
break
end
io.stdout:write("Failed to submit discovered URLs." .. tostring(code) .. tostring(body) .. "\n")
io.stdout:flush()
os.execute("sleep " .. math.floor(math.pow(2, tries)))
tries = tries + 1
end
if tries == maxtries then
abortgrab = true
end
end
local newurls = nil
local is_bad = false
local count = 0
local dup_urls = io.open(item_dir .. "/" .. warc_file_base .. "_duplicate-urls.txt", "w")
for url, _ in pairs(queued_urls) do
for _, pattern in pairs(bad_patterns) do
is_bad = string.match(url, pattern)
if is_bad then
io.stdout:write("Filtering out URL " .. url .. ".\n")
io.stdout:flush()
break
end
end
if not is_bad then
io.stdout:write("Queuing URL " .. url .. ".\n")
io.stdout:flush()
dup_urls:write(url .. "\n")
if newurls == nil then
newurls = url
else
newurls = newurls .. "\0" .. url
end
count = count + 1
if count == 100 then
submit_backfeed(newurls)
newurls = nil
count = 0
end
end
end
if newurls ~= nil then
submit_backfeed(newurls)
end
dup_urls:close()
local file = io.open(item_dir .. "/" .. warc_file_base .. "_bad-urls.txt", "w")
for url, _ in pairs(bad_urls) do
file:write(url .. "\n")
end
file:close()
end
wget.callbacks.before_exit = function(exit_status, exit_status_string)
if abortgrab then
return wget.exits.IO_FAIL
end
return exit_status
end

View File

@ -1,381 +0,0 @@
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:40.0) Gecko/20100101 Firefox/62.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:43.0) Gecko/20100101 Firefox/43.0 SeaMonkey/2.40
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:45.0) Gecko/20100101 Firefox/45.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:47.0) Gecko/20100101 Firefox/47.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:48.0) Gecko/20100101 Firefox/48.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:49.0) Gecko/20100101 Firefox/49.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:50.0) Gecko/20100101 Firefox/50.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:52.0) Gecko/20100101 Firefox/52.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:54.0) Gecko/20100101 Firefox/54.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:56.0) Gecko/20100101 Firefox/56.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:56.0) Gecko/20100101 Firefox/56.0.4 Waterfox/56.0.4
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:56.0; Waterfox) Gecko/20100101 Firefox/56.2.3
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:56.0; Waterfox) Gecko/20100101 Firefox/56.2.4
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:56.0; Waterfox) Gecko/20100101 Firefox/56.2.5
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:58.0) Gecko/20100101 Firefox/58.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:59.0) Gecko/20100101 Firefox/59.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:60.0) Gecko/20100101 Firefox/60.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:61.0) Gecko/20100101 Firefox/61.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:62.0) Gecko/20100101 Firefox/62.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:63.0) Gecko/20100101 Firefox/63.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:64.0) Gecko/20100101 Firefox/64.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:65.0) Gecko/20100101 Firefox/65.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:43.0) Gecko/20100101 Firefox/43.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:43.0) Gecko/20100101 Firefox/43.0 SeaMonkey/2.40
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:47.0) Gecko/20100101 Firefox/47.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:48.0) Gecko/20100101 Firefox/48.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:49.0) Gecko/20100101 Firefox/49.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:50.0) Gecko/20100101 Firefox/50.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:51.0) Gecko/20100101 Firefox/51.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:51.0) Gecko/20100101 Firefox/51.0 SeaMonkey/2.48
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:52.0) Gecko/20100101 Firefox/52.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:54.0) Gecko/20100101 Firefox/54.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:55.0) Gecko/20100101 Firefox/55.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:56.0) Gecko/20100101 Firefox/56.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:56.0) Gecko/20100101 Firefox/56.0.4 Waterfox/56.0.4
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:56.0; Waterfox) Gecko/20100101 Firefox/56.2.3
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:56.0; Waterfox) Gecko/20100101 Firefox/56.2.4
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:56.0; Waterfox) Gecko/20100101 Firefox/56.2.5
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:57.0) Gecko/20100101 Firefox/57.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:58.0) Gecko/20100101 Firefox/58.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:59.0) Gecko/20100101 Firefox/59.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:60.0) Gecko/20100101 Firefox/60.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:61.0) Gecko/20100101 Firefox/61.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:62.0) Gecko/20100101 Firefox/62.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:63.0) Gecko/20100101 Firefox/63.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:64.0) Gecko/20100101 Firefox/64.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:49.0) Gecko/20100101 Firefox/49.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:50.0) Gecko/20100101 Firefox/50.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:51.0) Gecko/20100101 Firefox/51.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:52.0) Gecko/20100101 Firefox/52.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:53.0) Gecko/20100101 Firefox/53.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:54.0) Gecko/20100101 Firefox/54.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:55.0) Gecko/20100101 Firefox/55.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:56.0) Gecko/20100101 Firefox/56.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:56.0; Waterfox) Gecko/20100101 Firefox/56.2.5
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:57.0) Gecko/20100101 Firefox/57.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:58.0) Gecko/20100101 Firefox/58.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:59.0) Gecko/20100101 Firefox/59.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:59.0.2) Gecko/20100101 Firefox/59.0.2
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:60.0) Gecko/20100101 Firefox/60.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:61.0) Gecko/20100101 Firefox/61.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:61.0) Gecko/20100101 Firefox/62.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:62.0) Gecko/20100101 Firefox/62.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:63.0) Gecko/20100101 Firefox/63.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:64.0) Gecko/20100101 Firefox/64.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:40.0) Gecko/20100101 Firefox/40.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:48.0) Gecko/20100101 Firefox/48.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:52.0) Gecko/20100101 Firefox/52.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:52.0) Gecko/20100101 Firefox/52.0 SeaMonkey/2.49.3
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:55.0) Gecko/20100101 Firefox/55.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:56.0) Gecko/20100101 Firefox/56.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:56.0) Gecko/20100101 Firefox/56.0.4 Waterfox/56.0.4
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:56.0; Waterfox) Gecko/20100101 Firefox/56.2.3
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:56.0; Waterfox) Gecko/20100101 Firefox/56.2.4
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:56.0; Waterfox) Gecko/20100101 Firefox/56.2.5
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:57.0) Gecko/20100101 Firefox/57.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:57.0) Gecko/20100101 Firefox/99.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:58.0) Gecko/20100101 Firefox/58.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:59.0) Gecko/20100101 Firefox/59.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:60.0) Gecko/20100101 Firefox/60.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:61.0) Gecko/20100101 Firefox/61.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:62.0) Gecko/20100101 Firefox/62.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:63.0) Gecko/20100101 Firefox/63.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:64.0) Gecko/20100101 Firefox/64.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:65.0) Gecko/20100101 Firefox/65.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:52.0) Gecko/20100101 Firefox/52.0 SeaMonkey/2.49.2
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:56.0; Waterfox) Gecko/20100101 Firefox/56.2.5
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:57.0) Gecko/20100101 Firefox/57.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:60.0) Gecko/20100101 Firefox/60.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:61.0) Gecko/20100101 Firefox/61.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:62.0) Gecko/20100101 Firefox/62.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:63.0) Gecko/20100101 Firefox/63.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:64.0) Gecko/20100101 Firefox/64.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:65.0) Gecko/20100101 Firefox/65.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:40.0) Gecko/20100101 Firefox/40.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:45.0) Gecko/20100101 Firefox/45.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:47.0) Gecko/20100101 Firefox/47.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:48.0) Gecko/20100101 Firefox/48.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:52.9) Gecko/20100101 Goanna/3.4 Firefox/52.9 PaleMoon/27.8.3
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:44.0) Gecko/20100101 Firefox/44.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:45.0) Gecko/20100101 Firefox/45.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:47.0) Gecko/20100101 Firefox/47.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:48.0) Gecko/20100101 Firefox/48.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:49.0) Gecko/20100101 Firefox/49.0.2.1 Waterfox/49.0.2.1
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:45.0) Gecko/20100101 Firefox/45.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:48.0) Gecko/20100101 Firefox/48.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:56.0) Gecko/20100101 Firefox/56.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:56.0; Waterfox) Gecko/20100101 Firefox/56.2.5
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:41.0) Gecko/20100101 Firefox/41.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:47.0) Gecko/20100101 Firefox/47.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:48.0) Gecko/20100101 Firefox/48.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:52.0) Gecko/20100101 Firefox/52.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:52.0) Gecko/20100101 Firefox/52.0 SeaMonkey/2.49.1
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:55.0) Gecko/20100101 Firefox/55.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:56.0) Gecko/20100101 Firefox/56.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:56.0) Gecko/20100101 Firefox/56.0.1 Waterfox/56.0.1
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:57.0) Gecko/20100101 Firefox/57.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:58.0) Gecko/20100101 Firefox/58.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:59.0) Gecko/20100101 Firefox/59.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:60.0) Gecko/20100101 Firefox/60.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:61.0) Gecko/20100101 Firefox/61.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:62.0) Gecko/20100101 Firefox/62.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:63.0) Gecko/20100101 Firefox/63.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/11.1.2 Safari/605.1.15
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1; rv:50.0) Gecko/20100101 Firefox/50.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1; rv:55.0) Gecko/20100101 Firefox/55.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_2; rv:49.0) Gecko/20100101 Firefox/49.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.102 Safari/537.36
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0.1 Safari/605.1.15
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/11.1.2 Safari/605.1.15
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0.1 Safari/605.1.15
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0.2 Safari/605.1.15
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0 Safari/605.1.15
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0.1 Safari/605.1.15
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3639.1 Safari/537.36
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0.2 Safari/605.1.15
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_29_81; rv:45.70.23) Gecko/20134284 Firefox/45.70.23
Mozilla/5.0 (Macintosh; Intel Mac OS X 11.11; rv:51.0) Gecko/20100101 Firefox/60.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 9.3; rv:45.0) Gecko/20100101 Firefox/57.0
Mozilla/5.0 (Macintosh; Intel Mac OS X 9.3; rv:45.0) Gecko/20100101 Firefox/59.0.2
Mozilla/5.0 (Macintosh; PPC Mac OS X 10.11; rv:46.0) Gecko/20100101 Firefox/46.0
Mozilla/5.0 (Macintosh; PPC Mac OS X 10.12; rv:46.0) Gecko/20100101 Firefox/46.0
Mozilla/5.0 (Macintosh; PPC Mac OS X 10.12; rv:55.0) Gecko/20100101 Firefox/55.0
Mozilla/5.0 (Macintosh; PPC Mac OS X 10.4; FPR7; rv:45.0) Gecko/20100101 Firefox/45.0 TenFourFox/G5
Mozilla/5.0 (Macintosh; PPC Mac OS X 10.4; FPR8; rv:45.0) Gecko/20100101 Firefox/45.0 TenFourFox/G5
Mozilla/5.0 (Macintosh; PPC Mac OS X 10.4; FPR9; rv:45.0) Gecko/20100101 Firefox/45.0 TenFourFox/G5
Mozilla/5.0 (Macintosh; PPC Mac OS X 10.5; FPR8; rv:45.0) Gecko/20100101 Firefox/45.0 TenFourFox/7450
Mozilla/5.0 (Macintosh; PPC Mac OS X 10.8; rv:47.0) Gecko/20100101 Firefox/47.0
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.10; rv:59.0) Gecko/20100101 Firefox/59.0
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.10; rv:64.0) Gecko/20100101 Firefox/64.0
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.10; rv:65.0) Gecko/20100101 Firefox/65.0
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.11; rv:59.0) Gecko/20100101 Firefox/59.0
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.11; rv:60.0) Gecko/20100101 Firefox/60.0
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.11; rv:61.0) Gecko/20100101 Firefox/61.0
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.11; rv:62.0) Gecko/20100101 Firefox/62.0
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.12; rv:54.0) Gecko/20100101 Firefox/54.0
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.12; rv:60.0) Gecko/20100101 Firefox/60.0
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.12; rv:61.0) Gecko/20100101 Firefox/61.0
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.12; rv:62.0) Gecko/20100101 Firefox/62.0
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.13; rv:59.0) Gecko/20100101 Firefox/59.0
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.13; rv:60.0) Gecko/20100101 Firefox/60.0
Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.13; rv:62.0) Gecko/20100101 Firefox/62.0
Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36
Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36
Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.85 Safari/537.36
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:20.0) Gecko/20100101 Firefox/60.0
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:40.0) Gecko/20100101 Firefox/40.0
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:40.0) Gecko/20100101 Firefox/45.0
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:40.0) Gecko/20100101 IceDragon/40.1.1.18 Firefox/40.0.2
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:41.0) Gecko/20100101 Firefox/41.0
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:42.0) Gecko/20100101 Firefox/42.0
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0 Framafox/43.0.1
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0 SeaMonkey/2.40
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:44.0) Gecko/20100101 Firefox/44.0
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:45.0) Gecko/20100101 Firefox/45.0
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:45.63.16) Gecko/20175595 Firefox/45.63.16
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:46.0) Gecko/20100101 Firefox/46.0
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:47.0) Gecko/20100101 Firefox/47.0
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:48.0) Gecko/20100101 Firefox/48.0
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:49.0) Gecko/20100101 Firefox/49.0
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:49.0) Gecko/20100101 Firefox/49.0 SeaMonkey/2.46
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:51.0) Gecko/20100101 Firefox/45.0
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:51.0) Gecko/20100101 Firefox/47.0
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:51.0) Gecko/20100101 Firefox/60.0
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:51.0) Gecko/20100101 Firefox/64.0
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.0) Gecko/20100101 Firefox/52.0
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.0) Gecko/20100101 Firefox/52.0 Cyberfox/52.9.1
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.0) Gecko/20100101 Firefox/52.0 SeaMonkey/2.49.1
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.0) Gecko/20100101 Firefox/52.0 SeaMonkey/2.49.2
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.0) Gecko/20100101 Firefox/52.0 SeaMonkey/2.49.2 Lightning/5.4
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.0) Gecko/20100101 Firefox/52.0 SeaMonkey/2.49.3
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.0) Gecko/20100101 Firefox/52.0 SeaMonkey/2.49.4
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.0) Gecko/20100101 Firefox/52.0 Zotero/5.0
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.9) Gecko/20100101 Firefox/52.9
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.9) Gecko/20100101 Goanna/3.4 Firefox/52.9 PaleMoon/27.6.2
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.9) Gecko/20100101 Goanna/3.4 Firefox/52.9 PaleMoon/27.7.2
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.9) Gecko/20100101 Goanna/3.4 Firefox/52.9 PaleMoon/27.8.2
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.9) Gecko/20100101 Goanna/3.4 Firefox/52.9 PaleMoon/27.8.3
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.9) Gecko/20100101 Goanna/3.4 Firefox/52.9 PaleMoon/27.9.0
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.9) Gecko/20100101 Goanna/3.4 Firefox/52.9 PaleMoon/27.9.1
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.9) Gecko/20100101 Goanna/3.4 Firefox/52.9 PaleMoon/27.9.2
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.9) Gecko/20100101 Goanna/3.4 Firefox/52.9 PaleMoon/27.9.3
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.9) Gecko/20100101 Goanna/4.1 Firefox/52.9 Basilisk/20180927
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.9) Gecko/20100101 Goanna/4.1 Firefox/52.9 PaleMoon/28.0.0
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.9) Gecko/20100101 Goanna/4.1 Firefox/52.9 PaleMoon/28.0.0a2
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.9) Gecko/20100101 Goanna/4.1 Firefox/52.9 PaleMoon/28.0.1
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.9) Gecko/20100101 Goanna/4.1 Firefox/52.9 PaleMoon/28.1.0
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:53.0) Gecko/20100101 Firefox/53.0
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:54.0) Gecko/20100101 Firefox/54.0
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:55.0) Gecko/20100101 Firefox/55.0
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:56.0) Gecko/20100101 Firefox/50.0
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:56.0) Gecko/20100101 Firefox/56.0
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:56.0) Gecko/20100101 Firefox/56.0 SeaMonkey/2.49.3
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:56.0) Gecko/20100101 Firefox/57.0
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:57.0) Gecko/20100101 Firefox/57.0
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:58.0) Gecko/20100101 Firefox/58.0
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:58.0) Gecko/20100101 Firefox/58.0 IceDragon/58.0.1
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:59.0) Gecko/20100101 Firefox/59.0
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:60.0) Gecko/20100101 Firefox/60.0
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:60.0) Gecko/20100101 Firefox/60.0 IceDragon/60.0.2
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:60.9) Gecko/20100101 Goanna/4.1 Firefox/60.9 PaleMoon/28.2.1
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:61.0) Gecko/20100101 Firefox/61.0
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:61.0) Gecko/20100101 Firefox/61.0 IceDragon/61.0
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:62.0) Gecko/20100101 Firefox/62.0
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:62.0) Gecko/20100101 Firefox/62.0 IceDragon/62.0.2
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:63.0) Gecko/20100101 Firefox/63.0
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:64.0) Gecko/20100101 Firefox/64.0
Mozilla/5.0 (Windows NT 10.0; WOW64; rv:65.0) Gecko/20100101 Firefox/65.0
Mozilla/5.0 (Windows NT 10.0; Win64; rv:54.0) Gecko/20100101 Firefox/54.0
Mozilla/5.0 (Windows NT 10.0; Win64; rv:55.0) Gecko/20100101 Firefox/55.0
Mozilla/5.0 (Windows NT 10.0; Win64; rv:59.0) Gecko/20100101 Firefox/59.0
Mozilla/5.0 (Windows NT 10.0; Win64; rv:60.0) Gecko/20100101 Firefox/60.0
Mozilla/5.0 (Windows NT 10.0; Win64; rv:61.0) Gecko/20100101 Firefox/61.0
Mozilla/5.0 (Windows NT 10.0; Win64; rv:61.0) Gecko/20100101 Firefox/62.0
Mozilla/5.0 (Windows NT 10.0; Win64; rv:62.0) Gecko/20100101 Firefox/62.0
Mozilla/5.0 (Windows NT 10.0; Win64; rv:63.0) Gecko/20100101 Firefox/63.0
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:41.0) Gecko/20100101 Firefox/41.0
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:43.0) Gecko/20100101 Firefox/43.0
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:43.0) Gecko/20100101 Firefox/43.0.4 Waterfox/43.0.4
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:45.0) Gecko/20100101 Firefox/45.0
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:46.0) Gecko/20100101 Firefox/46.0.1 Waterfox/46.0.1
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:49.0) Gecko/20100101 Firefox/49.0
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:50.0) Gecko/20100101 Firefox/50.0
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:51.0) Gecko/20100101 Firefox/51.0
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:52.0) Gecko/20100101 Firefox/52.0
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:52.0) Gecko/20100101 Firefox/52.0 Cyberfox/52.0.4
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:52.0) Gecko/20100101 Firefox/52.0 Cyberfox/52.5.0
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:52.0) Gecko/20100101 Firefox/52.0 Cyberfox/52.5.2
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:52.0) Gecko/20100101 Firefox/52.0 Cyberfox/52.7.2
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:52.0) Gecko/20100101 Firefox/52.0 Cyberfox/52.7.4
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:52.0) Gecko/20100101 Firefox/52.0 Cyberfox/52.8.0
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:52.0) Gecko/20100101 Firefox/52.0 Cyberfox/52.9.1
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:52.0) Gecko/20100101 Firefox/52.0.2 Waterfox/52.0.2
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:52.0) Gecko/20100101 Firefox/59.0
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:52.9) Gecko/20100101 Goanna/3.3 Firefox/52.9 PaleMoon/27.5.1
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:52.9) Gecko/20100101 Goanna/3.4 Firefox/52.9 PaleMoon/27.8.3
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:52.9) Gecko/20100101 Goanna/3.4 Firefox/52.9 PaleMoon/27.9.0
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:52.9) Gecko/20100101 Goanna/3.4 Firefox/52.9 PaleMoon/27.9.1
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:52.9) Gecko/20100101 Goanna/3.4 Firefox/52.9 PaleMoon/27.9.2
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:52.9) Gecko/20100101 Goanna/3.4 Firefox/52.9 PaleMoon/27.9.3
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:52.9) Gecko/20100101 Goanna/3.4 Firefox/52.9 PaleMoon/27.9.4
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:52.9) Gecko/20100101 Goanna/4.1 Firefox/52.9 Basilisk/20180424
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:52.9) Gecko/20100101 Goanna/4.1 Firefox/52.9 Basilisk/20180515
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:52.9) Gecko/20100101 Goanna/4.1 Firefox/52.9 Basilisk/20180601
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:52.9) Gecko/20100101 Goanna/4.1 Firefox/52.9 Basilisk/20180718
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:52.9) Gecko/20100101 Goanna/4.1 Firefox/52.9 Basilisk/20180905
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:52.9) Gecko/20100101 Goanna/4.1 Firefox/52.9 Basilisk/20180927
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:52.9) Gecko/20100101 Goanna/4.1 Firefox/52.9 PaleMoon/28.0.0
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:52.9) Gecko/20100101 Goanna/4.1 Firefox/52.9 PaleMoon/28.0.0.1
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:52.9) Gecko/20100101 Goanna/4.1 Firefox/52.9 PaleMoon/28.0.1
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:52.9) Gecko/20100101 Goanna/4.1 Firefox/52.9 PaleMoon/28.1.0
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:53.0) Gecko/20100101 Firefox/53.0
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:54.0) Gecko/20100101 Firefox/54.0
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:54.0) Gecko/20100101 Firefox/54.0.1 Waterfox/54.0.1
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:55.0) Gecko/20100101 Firefox/55.0
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:56.0) Gecko/20100101 Firefox/56.0
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:56.0) Gecko/20100101 Firefox/56.0.1 Waterfox/56.0.1
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:56.0) Gecko/20100101 Firefox/56.0.4 Waterfox/56.0.4
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:56.0; Waterfox) Gecko/20100101 Firefox/56.2.3
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:56.0; Waterfox) Gecko/20100101 Firefox/56.2.4
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:56.0; Waterfox) Gecko/20100101 Firefox/56.2.5
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:57.0) Gecko/20100101 Firefox/59.0
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:58.0) Gecko/20100101 Firefox/58.0
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:59.0) Gecko/20100101 Firefox/59.0
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:60.0) Gecko/20100101 Firefox/60.0
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:62.0) Gecko/20100101 Firefox/62.0
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:63.0) Gecko/20100101 Firefox/63.0
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:64.0) Gecko/20100101 Firefox/64.0
Mozilla/5.0 (Windows NT 10.0; rv:44.0) Gecko/20100101 Firefox/44.0.1
Mozilla/5.0 (Windows NT 10.0; rv:45.0) Gecko/20100101 Firefox/45.0
Mozilla/5.0 (Windows NT 10.0; rv:47.0) Gecko/20100101 Firefox/47.0
Mozilla/5.0 (Windows NT 10.0; rv:49.0) Gecko/20100101 Firefox/49.0
Mozilla/5.0 (Windows NT 10.0; rv:50.0) Gecko/20100101 Firefox/50.0
Mozilla/5.0 (Windows NT 10.0; rv:51.0) Gecko/20100101 Firefox/51.0
Mozilla/5.0 (Windows NT 10.0; rv:52.0) Gecko/20100101 Firefox/52.0
Mozilla/5.0 (Windows NT 10.0; rv:52.0) Gecko/20100101 Firefox/52.0 Cyberfox/52.7.2
Mozilla/5.0 (Windows NT 10.0; rv:52.0) Gecko/20100101 Firefox/52.0 Cyberfox/52.9.1
Mozilla/5.0 (Windows NT 10.0; rv:52.0) Gecko/20100101 Firefox/52.0 SeaMonkey/2.49.4
Mozilla/5.0 (Windows NT 10.0; rv:52.9) Gecko/20100101 Goanna/3.4 Firefox/52.9 PaleMoon/27.9.1
Mozilla/5.0 (Windows NT 10.0; rv:52.9) Gecko/20100101 Goanna/3.4 Firefox/52.9 PaleMoon/27.9.1a1
Mozilla/5.0 (Windows NT 10.0; rv:52.9) Gecko/20100101 Goanna/3.4 Firefox/52.9 PaleMoon/27.9.3
Mozilla/5.0 (Windows NT 10.0; rv:52.9) Gecko/20100101 Goanna/4.1 Firefox/52.9 PaleMoon/28.1.0
Mozilla/5.0 (Windows NT 10.0; rv:53.0) Gecko/20100101 Firefox/53.0
Mozilla/5.0 (Windows NT 10.0; rv:55.0) Gecko/20100101 Firefox/55.0
Mozilla/5.0 (Windows NT 10.0; rv:56.0) Gecko/20100101 Firefox/56.0
Mozilla/5.0 (Windows NT 10.0; rv:57.0) Gecko/20100101 Firefox/57.0
Mozilla/5.0 (Windows NT 10.0; rv:58.0) Gecko/20100101 Firefox/58.0
Mozilla/5.0 (Windows NT 10.0; rv:59.0) Gecko/20100101 Firefox/59.0
Mozilla/5.0 (Windows NT 10.0; rv:60.0) Gecko/20100101 Firefox/60.0
Mozilla/5.0 (Windows NT 10.0; rv:61.0) Gecko/20100101 Firefox/61.0
Mozilla/5.0 (Windows NT 10.0; rv:62.0) Gecko/20100101 Firefox/62.0
Mozilla/5.0 (Windows NT 10.0; rv:63.0) Gecko/20100101 Firefox/63.0
Mozilla/5.0 (Windows NT 10.0; rv:64.0) Gecko/20100101 Firefox/64.0
Mozilla/5.0 (Windows NT 4.0; rv:52.0) Gecko/20100101 Firefox/52.0
Mozilla/5.0 (Windows NT 5.1; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0
Mozilla/5.0 (Windows NT 5.1; WOW64; rv:52.0) Gecko/20100101 Firefox/52.0
Mozilla/5.0 (Windows NT 5.1; WOW64; rv:61.0) Gecko/20100101 Firefox/61.0
Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36
Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36
Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36
Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36
Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36
Mozilla/5.0 (Windows NT 6.1; WOW64; rv:55.0) Gecko/20100101 Firefox/55.0
Mozilla/5.0 (Windows NT 6.1; WOW64; rv:63.0) Gecko/20100101 Firefox/63.0
Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36
Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36
Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36
Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36
Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36
Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36
Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36
Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:52.0) Gecko/20100101 Firefox/52.0
Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:60.0) Gecko/20100101 Firefox/60.0
Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:63.0) Gecko/20100101 Firefox/63.0
Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:64.0) Gecko/20100101 Firefox/64.0
Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0
Mozilla/5.0 (Windows NT 6.1; rv:63.0) Gecko/20100101 Firefox/63.0
Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36
Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36
Mozilla/5.0 (Windows NT 6.3; Win64; x64; rv:63.0) Gecko/20100101 Firefox/63.0
Mozilla/5.0 (X11; CrOS x86_64 11021.81.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36
Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.106 Safari/537.36
Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36
Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36
Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36
Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36
Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.67 Safari/537.36
Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36
Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36
Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36
Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/70.0.3538.77 Chrome/70.0.3538.77 Safari/537.36
Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0
Mozilla/5.0 (X11; Linux x86_64; rv:60.0) Gecko/20100101 Firefox/60.0
Mozilla/5.0 (X11; Linux x86_64; rv:63.0) Gecko/20100101 Firefox/63.0
Mozilla/5.0 (X11; Linux x86_64; rv:64.0) Gecko/20100101 Firefox/64.0
Mozilla/5.0 (X11; OpenBSD amd64; rv:56.0) Gecko/20100101 Firefox/66.0
Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:63.0) Gecko/20100101 Firefox/63.0
Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:64.0) Gecko/20100101 Firefox/64.0