Source code for augraphy.utilities.figsharedownloader
import json
import os
import random
import shutil
from urllib.request import urlretrieve
import requests
from requests.exceptions import HTTPError
[docs]
class FigshareDownloader:
"""Makes HTTP requests for images on Figshare"""
def __init__(self, directory="figshare/"):
self.save_dir = os.path.join(os.getcwd(), directory)
[docs]
def make_files_url(self, article_id):
"""Form the full URL for requests"""
return f"https://api.figshare.com/v2/articles/{article_id}/files"
[docs]
def make_save_dir(self):
# Don't throw errors if we download stuff multiple times
os.makedirs(self.save_dir, exist_ok=True)
[docs]
def send_request(self, url, headers):
"""Request Figshare data
:param url: request endpoint
:type url: string
:param headers: header info for request
:type headers: dictionary
:param data: Figshare article data
:type data: dictionary
:param binary: True if downloading images
:type binary: boolean, optional
"""
response = requests.request("GET", url, headers=headers, data=None)
try:
response.raise_for_status()
try:
response_data = json.loads(response.text)
except ValueError:
response_data = response.content
except HTTPError as error:
print(f"HTTP Error: {error}")
print(f"Response Body:\n {response.text}")
raise
return response_data
[docs]
def list_article_files(self, article_id):
"""Get a dictionary of files from Figshare.
:param article_id: ID of the Figshare article
:type article_id: string
"""
request_url = self.make_files_url(article_id)
request_header = {"Content-Type": "application/json"}
response = self.send_request(request_url, headers=request_header)
return response
[docs]
def download_file_by_id(self, file_id, file_name=None):
"""Download a single file using its unique identifier,
and optionally rename it.
:param file_id: ID of the Figshare file
:type id: string
"""
# Make ./figshare/ if not available
self.make_save_dir()
local_file, headers = urlretrieve(
f"https://figshare.com/ndownloader/files/{file_id}",
)
if file_name is not None:
shutil.move(local_file, os.path.join(self.save_dir, file_name))
else:
# urlretrieve puts everything in /tmp so we strip "/tmp/" from local_file
shutil.move(local_file, os.path.join(self.save_dir, local_file[5:]))
[docs]
def download_all_files_from_article(self, article_id):
"""Download every file in article_id
:param article_id: ID of the Figshare article
:type article_id: string
"""
# Get list of dictionaries of file info
file_list = self.list_article_files(article_id)
# Make ./figshare/ if not available
self.make_save_dir()
# Save the files
for file_dict in file_list:
urlretrieve(
file_dict["download_url"],
os.path.join(self.save_dir, file_dict["name"]),
)
[docs]
def download_random_file_from_article(self, article_id):
"""Randomly download single file in article_id
:param article_id: ID of the Figshare article
:type article_id: string
"""
# Get list of dictionaries of file info
file_list = self.list_article_files(article_id)
# Make ./figshare/ if not available
self.make_save_dir()
# Save the files
file_dict = file_list[random.randint(0, len(file_list) - 1)]
urlretrieve(
file_dict["download_url"],
os.path.join(self.save_dir, file_dict["name"]),
)