This commit is contained in:
TheTechRobo 2022-04-01 21:16:56 -04:00
commit 5c11653d32
3 changed files with 69 additions and 0 deletions

Binary file not shown.

3
raw_data.py Normal file
View File

@ -0,0 +1,3 @@
connection_init = r"""{"type":"connection_init","payload":{"Authorization":"Bearer 453755016066-KVjVvAltNXo2xrQ0noKgPPRSe0p9Jw"}}"""
id1 = r"""{"id":"1","type":"start","payload":{"variables":{"input":{"channel":{"teamOwner":"AFD2022","category":"CONFIG"}}},"extensions":{},"operationName":"configuration","query":"subscription configuration($input: SubscribeInput!) {\n subscribe(input: $input) {\n id\n ... on BasicMessage {\n data {\n __typename\n ... on ConfigurationMessageData {\n colorPalette {\n colors {\n hex\n index\n __typename\n }\n __typename\n }\n canvasConfigurations {\n index\n dx\n dy\n __typename\n }\n canvasWidth\n canvasHeight\n __typename\n }\n }\n __typename\n }\n __typename\n }\n}\n"}}"""
id2 = r"""{"id":"2","type":"start","payload":{"variables":{"input":{"channel":{"teamOwner":"AFD2022","category":"CANVAS","tag":"0"}}},"extensions":{},"operationName":"replace","query":"subscription replace($input: SubscribeInput!) {\n subscribe(input: $input) {\n id\n ... on BasicMessage {\n data {\n __typename\n ... on FullFrameMessageData {\n __typename\n name\n timestamp\n }\n ... on DiffFrameMessageData {\n __typename\n name\n currentTimestamp\n previousTimestamp\n }\n }\n __typename\n }\n __typename\n }\n}\n"}}"""

66
script.py Normal file
View File

@ -0,0 +1,66 @@
import asyncio, websockets,time,json, requests, rethinkdb, aiohttp
from rethinkdb import r
import raw_data
r.set_loop_type("asyncio")
STOP = False
class PlaceScraper:
db = "place"
imgtable = "img"
wstable = "ws"
async def add_to_db(self, db, table, data):
while True:
try:
conn = await r.connect()
return await r.db(db).table(table).insert(data).run(conn)
except rethinkdb.errors.ReqlDriverError as ename:
print(f"Retrying DB transaction... ({ename})")
await asyncio.sleep(2)
async def download_and_add_to_db(self, url, sesh):
while True:
try:
async with sesh.get(url) as resp:
data = await resp.read()
except Exception as ename:
print(f"Retrying {url} ({ename})...")
await asyncio.sleep(10)
else:
break
return await self.add_to_db(
self.db,
self.imgtable,
{"time": time.time(), "url": url, "data": data}
)
async def run_forever(self):
connector = aiohttp.TCPConnector(limit=25)
async with websockets.connect("wss://gql-realtime-2.reddit.com/query", extra_headers=[["Origin", "https://hot-potato.reddit.com"]]) as websocket:
await websocket.send(raw_data.connection_init)
await websocket.send(raw_data.id1)
await websocket.send(raw_data.id2)
async with aiohttp.ClientSession(connector=connector) as sesh:
while True:
response = await websocket.recv()
print(response)
await self.add_to_db(self.db, self.wstable, {"time":time.time(), "data": response})
try:
await self.download_and_add_to_db(json.loads(response)["payload"]["data"]["subscribe"]["data"]["name"], sesh=sesh) #dude why is it so LONG
except KeyError:
# probably one of the first few responses; skip
pass
if STOP:
break #, drop and roll
print("\tSTOP: Exited gracefully after CtrlC.")
client = PlaceScraper()
while True:
try:
asyncio.run(client.run_forever())
except (requests.exceptions.SSLError, requests.exceptions.ConnectionError, ConnectionResetError) as ename:
print(f"Caught {ename}")
except KeyboardInterrupt:
STOP = True
if STOP:
print("\tSTOP: Exited gracefully after CtrlC.")
break