Save some time
This commit is contained in:
parent
cbd257ef02
commit
867a616bed
11
pipeline.py
11
pipeline.py
|
@ -17,6 +17,7 @@ from seesaw.task import SimpleTask, LimitConcurrent
|
|||
import hashlib
|
||||
import shutil
|
||||
import socket
|
||||
import requests
|
||||
import sys
|
||||
import json
|
||||
import time
|
||||
|
@ -53,6 +54,15 @@ WGET_AT = find_executable(
|
|||
if not WGET_AT:
|
||||
raise Exception('No usable Wget+At found.')
|
||||
|
||||
class CheckItemExists(SimpleTask):
|
||||
def __init__(self):
|
||||
SimpleTask.__init__(self, 'CheckItemExists')
|
||||
|
||||
def process(self, item):
|
||||
resp = requests.get(f"https://strawpoll.me/{item['item_name']}")
|
||||
print(f"Got status code {resp.status_code}")
|
||||
assert resp.status_code == 200, "Poll not found. Save time, archive better."
|
||||
|
||||
class CheckIP(SimpleTask):
|
||||
def __init__(self):
|
||||
SimpleTask.__init__(self, 'CheckIP')
|
||||
|
@ -205,6 +215,7 @@ pipeline = Pipeline(
|
|||
GetItemFromTracker('http://{}/{}'
|
||||
.format(TRACKER_HOST, TRACKER_ID),
|
||||
downloader, VERSION),
|
||||
CheckItemExists(),
|
||||
PrepareDirectories(warc_prefix='strawpool'),
|
||||
WgetDownload(
|
||||
WgetArgs(),
|
||||
|
|
Loading…
Reference in New Issue