Source code for augraphy.utilities.figsharedownloader

import json
import os
import random
import shutil
from urllib.request import urlretrieve

import requests
from requests.exceptions import HTTPError



[docs]
class FigshareDownloader:
    """Makes HTTP requests for images on Figshare"""

    def __init__(self, directory="figshare/"):
        self.save_dir = os.path.join(os.getcwd(), directory)


[docs]
    def make_files_url(self, article_id):
        """Form the full URL for requests"""
        return f"https://api.figshare.com/v2/articles/{article_id}/files"



[docs]
    def make_save_dir(self):
        # Don't throw errors if we download stuff multiple times
        os.makedirs(self.save_dir, exist_ok=True)



[docs]
    def send_request(self, url, headers):
        """Request Figshare data

        :param url: request endpoint
        :type url: string
        :param headers: header info for request
        :type headers: dictionary
        :param data: Figshare article data
        :type data: dictionary
        :param binary: True if downloading images
        :type binary: boolean, optional
        """

        response = requests.request("GET", url, headers=headers, data=None)

        try:
            response.raise_for_status()

            try:
                response_data = json.loads(response.text)

            except ValueError:
                response_data = response.content

        except HTTPError as error:
            print(f"HTTP Error: {error}")
            print(f"Response Body:\n {response.text}")
            raise

        return response_data



[docs]
    def list_article_files(self, article_id):
        """Get a dictionary of files from Figshare.

        :param article_id: ID of the Figshare article
        :type article_id: string
        """

        request_url = self.make_files_url(article_id)
        request_header = {"Content-Type": "application/json"}
        response = self.send_request(request_url, headers=request_header)
        return response



[docs]
    def download_file_by_id(self, file_id, file_name=None):
        """Download a single file using its unique identifier,
        and optionally rename it.

        :param file_id: ID of the Figshare file
        :type id: string
        """

        # Make ./figshare/ if not available
        self.make_save_dir()

        local_file, headers = urlretrieve(
            f"https://figshare.com/ndownloader/files/{file_id}",
        )

        if file_name is not None:
            shutil.move(local_file, os.path.join(self.save_dir, file_name))
        else:
            # urlretrieve puts everything in /tmp so we strip "/tmp/" from local_file
            shutil.move(local_file, os.path.join(self.save_dir, local_file[5:]))



[docs]
    def download_all_files_from_article(self, article_id):
        """Download every file in article_id

        :param article_id: ID of the Figshare article
        :type article_id: string
        """

        # Get list of dictionaries of file info
        file_list = self.list_article_files(article_id)

        # Make ./figshare/ if not available
        self.make_save_dir()

        # Save the files
        for file_dict in file_list:
            urlretrieve(
                file_dict["download_url"],
                os.path.join(self.save_dir, file_dict["name"]),
            )



[docs]
    def download_random_file_from_article(self, article_id):
        """Randomly download single file in article_id

        :param article_id: ID of the Figshare article
        :type article_id: string
        """

        # Get list of dictionaries of file info
        file_list = self.list_article_files(article_id)

        # Make ./figshare/ if not available
        self.make_save_dir()

        # Save the files
        file_dict = file_list[random.randint(0, len(file_list) - 1)]
        urlretrieve(
            file_dict["download_url"],
            os.path.join(self.save_dir, file_dict["name"]),
        )