Download SharePoint files with Python

This script demonstrates some of the basics needed for downloading image files from an O365 SharePoint library to a local Linux directory using Python.

#!/usr/bin/env python

import os
import requests

CLIENT_ID = '<client_id>'
CLIENT_SECRET = '<client_secret>'
GRANT_TYPE = 'client_credentials'
LOCAL_DIR = './images'
SCOPE = 'https://graph.microsoft.com/.default'
TENANT_ID = '<tenant_id>'


def process_sharepoint():
    host_name = 'providence4.sharepoint.com'
    site_name = 'myHIway-data'
    site_uri = f'https://graph.microsoft.com/v1.0/sites/{host_name}:/sites/{site_name}'
    site_id = make_request(site_uri).get('id', {})
    drive_uri = f'https://graph.microsoft.com/v1.0/sites/{site_id}/drives'
    drives = make_request(drive_uri)
    drive_id = ''

    for item in drives.get('value', []):
        if item.get('name') == "Images":
            drive_id = item.get('id')

    items_uri = ''
    while True:
        # First page
        if items_uri == '':
            items_uri = f'https://graph.microsoft.com/v1.0/sites/{site_id}/drives/{drive_id}/root/children'
            items = make_request(items_uri)
            download_the_files(items)
            items_uri = items.get('@odata.nextLink')

        # All other pages
        else:
            items = make_request(items_uri)
            download_the_files(items)
            items_uri = items.get('@odata.nextLink')

        # No more pages found
        if items_uri is None:
            break


def get_access_token():
    token_request_uri = "https://login.microsoftonline.com/{}/oauth2/v2.0/token".format(TENANT_ID)
    data = {
        'client_id': CLIENT_ID,
        'client_secret': CLIENT_SECRET,
        'grant_type': GRANT_TYPE,
        'scope': SCOPE
    }
    resp = requests.post(
        token_request_uri,
        data=data,
    )
    token_response = resp.json()
    access_token = token_response.get('access_token')
    return access_token


def make_request(url):
    headers = {
        'Authorization': 'Bearer {}'.format(get_access_token())
    }
    r = requests.get(url, headers=headers)
    return r.json()


def download_the_files(items):
    for file in items.get('value', []):
        file_name = file.get('name')
        file_download = file.get('@microsoft.graph.downloadUrl')

        response = requests.get(file_download)
        save_to_path = os.path.join(LOCAL_DIR, file_name)
        with open(save_to_path, "wb") as f:
            f.write(response.content)


if __name__ == "__main__":
    process_sharepoint()

Main process

Get the site id from a JSON file with all lists and libraries related to a host and site name.
site_id = make_request(site_uri).get('id', {})

Use the site id to get a JSON file with all the drives on the site.
drives = make_request(drive_uri)

Loop over the drives to get the drive id of a library by name, e.g. "Images"
drive_id = item.get('id')

Get a JSON list of the files on the library's drive.
items = make_request(items_uri)

Download each item on the list.
download_the_files(items)

Microsoft returns this list as a JSON file containing only 200 results at a time, so look for an @odata.nextLink key in each response for a link to the next page of results.
items_uri = items.get('@odata.nextLink')

Get access token from Microsoft

The get_access_token method makes an OAuth request to Microsoft using an Application (client) ID and a Client secret created in an Azure App registration Service Principal.

CLIENT_ID = The Application (client) ID on the Overview page
CLIENT_SECRET = The client secret created on the Certificates & secrets page
TENANT_ID = The Directory (tenant) ID on the Overview page

All CAPTIALIZED settings should be added as secrets in a production environment.

Make a request

Simplify the code with a reusable make_request method that returns a JSON file.
r = requests.get(url, headers=headers)

Download the files

The download_the_files method downloads an image file using a @microsoft.graph.downloadUrl link provided by the JSON block for each image file. The LOCAL_DIR setting refers to the local Linux download directory relative to the Python script.