bilibili/wrapper.py

66 lines
2.2 KiB
Python
Raw Permalink Normal View History

import bilibili, json, requests
2022-03-25 21:22:44 +00:00
from alive_progress import alive_bar
2022-03-12 16:19:45 +00:00
from cprint import cprint
2022-08-20 18:16:21 +00:00
cprint.warn("PLEASE NOTE:\n\twrapper.py is meant to be used with Warcprox. **NO DATA IS SAVED ANYWHERE.**")
2022-03-12 16:19:45 +00:00
QUEUED = []
QUEUED_IMAGES = []
PROFILE = input("Please enter the profile ID: ")
list(bilibili.userScraper(PROFILE)) # get metadata
2022-03-25 21:22:44 +00:00
cprint.info("Scraping album pagination for URLs...")
2022-03-12 16:19:45 +00:00
2022-03-25 21:22:44 +00:00
with alive_bar() as bar:
for images, _ in bilibili.albumScraper(PROFILE):
for image in images["data"]["items"]:
QUEUED_IMAGES.append((image["dyn_id"], "IMAGE_POST"))
bar()
2022-03-12 16:19:45 +00:00
2022-03-25 21:22:44 +00:00
cprint.info("Now downloading image metadata...")
with alive_bar(total=len(QUEUED_IMAGES)) as bar:
for item, typee in QUEUED_IMAGES:
2022-03-12 16:19:45 +00:00
post = bilibili.postScraper(item)["data"]["card"]
try:
QUEUED.append((post["display"]["attach_card"]["cover_url"], "ATTACH_CARD_COVER_URL"))
except KeyError:
pass
try:
for addoncard in post["display"]["add_on_card_info"]:
QUEUED.append((addoncard["attach_card"]["cover_url"], "ADDON_CARD_COVER_URL"))
except KeyError:
pass
QUEUED.append((post["desc"]["user_profile"]["info"]["face"], "PFP"))
card = json.loads(post["card"])
for picture in card["item"]["pictures"]:
QUEUED.append((picture["img_src"], "IMAGE"))
QUEUED.append((card["user"]["head_url"], "PFP"))
2022-03-25 21:22:44 +00:00
bar()
cprint.info("Finished image collection.")
cprint.info("Downloading articles...")
with alive_bar() as bar:
for articles, __ in bilibili.articleScraper(PROFILE):
if not __.startswith("MASTER_LIST_"):
continue
for article in articles["data"]["articles"]:
for url in article["image_urls"]:
QUEUED.append((url, "ArticleImageUrl"))
bar()
for otherurl in article["origin_image_urls"]:
QUEUED.append((url, "ArticleOriginImgUrl"))
bar()
cprint.info("Finished article download.")
cprint.info("Downloading queued images and posts...")
with alive_bar(dual_line=True, total=len(QUEUED)) as bar:
for item, typee in QUEUED:
bar.text = f"{image} ({typee})"
requests.get(item)
2022-03-25 21:22:44 +00:00
bar()