This script demonstrates some of the basics needed for downloading image files from an O365 SharePoint library to a local Linux directory using Python.
#!/usr/bin/env python
import os
import requests
CLIENT_ID = '<client_id>'
CLIENT_SECRET = '<client_secret>'
GRANT_TYPE = 'client_credentials'
LOCAL_DIR = './images'
SCOPE = 'https://graph.microsoft.com/.default'
TENANT_ID = '<tenant_id>'
def process_sharepoint():
host_name = 'providence4.sharepoint.com'
site_name = 'myHIway-data'
site_uri = f'https://graph.microsoft.com/v1.0/sites/{host_name}:/sites/{site_name}'
site_id = make_request(site_uri).get('id', {})
drive_uri = f'https://graph.microsoft.com/v1.0/sites/{site_id}/drives'
drives = make_request(drive_uri)
drive_id = ''
for item in drives.get('value', []):
if item.get('name') == "Images":
drive_id = item.get('id')
items_uri = ''
while True:
# First page
if items_uri == '':
items_uri = f'https://graph.microsoft.com/v1.0/sites/{site_id}/drives/{drive_id}/root/children'
items = make_request(items_uri)
download_the_files(items)
items_uri = items.get('@odata.nextLink')
# All other pages
else:
items = make_request(items_uri)
download_the_files(items)
items_uri = items.get('@odata.nextLink')
# No more pages found
if items_uri is None:
break
def get_access_token():
token_request_uri = "https://login.microsoftonline.com/{}/oauth2/v2.0/token".format(TENANT_ID)
data = {
'client_id': CLIENT_ID,
'client_secret': CLIENT_SECRET,
'grant_type': GRANT_TYPE,
'scope': SCOPE
}
resp = requests.post(
token_request_uri,
data=data,
)
token_response = resp.json()
access_token = token_response.get('access_token')
return access_token
def make_request(url):
headers = {
'Authorization': 'Bearer {}'.format(get_access_token())
}
r = requests.get(url, headers=headers)
return r.json()
def download_the_files(items):
for file in items.get('value', []):
file_name = file.get('name')
file_download = file.get('@microsoft.graph.downloadUrl')
response = requests.get(file_download)
save_to_path = os.path.join(LOCAL_DIR, file_name)
with open(save_to_path, "wb") as f:
f.write(response.content)
if __name__ == "__main__":
process_sharepoint()
Main process
Get the site id from a JSON file with all lists and libraries related to a host and site name.
site_id = make_request(site_uri).get('id', {})
Use the site id to get a JSON file with all the drives on the site.
drives = make_request(drive_uri)
Loop over the drives to get the drive id of a library by name, e.g. "Images"
drive_id = item.get('id')
Get a JSON list of the files on the library's drive.
items = make_request(items_uri)
Download each item on the list.
download_the_files(items)
Microsoft returns this list as a JSON file containing only 200 results at a time, so look for an @odata.nextLink key in each response for a link to the next page of results.
items_uri = items.get('@odata.nextLink')
Get access token from Microsoft
The get_access_token method makes an OAuth request to Microsoft using an Application (client) ID and a Client secret created in an Azure App registration Service Principal.
CLIENT_ID
= The Application (client) ID on the Overview page
CLIENT_SECRET
= The client secret created on the Certificates & secrets page
TENANT_ID
= The Directory (tenant) ID on the Overview page
All CAPTIALIZED settings should be added as secrets in a production environment.
Make a request
Simplify the code with a reusable make_request method that returns a JSON file.
r = requests.get(url, headers=headers)
Download the files
The download_the_files method downloads an image file using a @microsoft.graph.downloadUrl link provided by the JSON block for each image file. The LOCAL_DIR setting refers to the local Linux download directory relative to the Python script.