3

"I'm trying to use this code from How to read SharePoint Online (Office365) Excel files into Python specifically pandas with Work or School Account? answers but a get the XLRDError: Unsupported format, or corrupt file: Expected BOF record; found b'\r\n<!DOCT'. I think the problem is in the way im placing my path. Do anybody knows how to get this type of Sharepoint path, like in the example below?" The ones I get look more like this "https://company.sharepoint.com/sites/site/Shared%20Documents/Forms/AllItems.aspx"

#import all the libraries
from office365.runtime.auth.authentication_context import AuthenticationContext
from office365.sharepoint.client_context import ClientContext
from office365.sharepoint.files.file import File 
import io
import pandas as pd

#target url taken from sharepoint and credentials
url = 'https://company.sharepoint.com/Shared%20Documents/Folder%20Number1/Folder%20Number2/Folder3/Folder%20Number4/Target_Excel_File_v4.xlsx?cid=_Random_letters_and_numbers-21dbf74c'
username = 'Dumby_account@company.com'
password = 'Password!'

ctx_auth = AuthenticationContext(url)
if ctx_auth.acquire_token_for_user(username, password):
  ctx = ClientContext(url, ctx_auth)
  web = ctx.web
  ctx.load(web)
  ctx.execute_query()
  print("Authentication successful")

response = File.open_binary(ctx, url)

#save data to BytesIO stream
bytes_file_obj = io.BytesIO()
bytes_file_obj.write(response.content)
bytes_file_obj.seek(0) #set file object to start

#read excel file and each sheet into pandas dataframe 
df = pd.read_excel(bytes_file_obj, sheetname = None)
Hector Castro
  • 63
  • 1
  • 1
  • 6

2 Answers2

1

I did it by opening the file in desktop and going to file > info > Copy Path. This path should work.

Hector Castro
  • 63
  • 1
  • 1
  • 6
0

Looks like you are using the share link instead of file path. You need to copy the correct path. Here's how:

  1. Open the sharepoint folder
  2. Click on the 3 dots in the file and click on Details
  3. Scroll down and copy the Path the path should look something like: '/user/folder/Documents/Target_Excel_File_v4.xlsx'

Use the sharepoint url to authenticate and then use the copied path to open your binary file.

#import all the libraries
from office365.runtime.auth.authentication_context import AuthenticationContext
from office365.sharepoint.client_context import ClientContext
from office365.sharepoint.files.file import File 
import io
import pandas as pd

#target url taken from sharepoint and credentials
url = 'https://company.sharepoint.com/user/folder'
path = '/user/folder/Documents/Target_Excel_File_v4.xlsx'
username = 'Dumby_account@company.com'
password = 'Password!'

ctx_auth = AuthenticationContext(url)
if ctx_auth.acquire_token_for_user(username, password):
  ctx = ClientContext(url, ctx_auth)
  web = ctx.web
  ctx.load(web)
  ctx.execute_query()
  print("Authentication successful")

response = File.open_binary(ctx, path)

#save data to BytesIO stream
bytes_file_obj = io.BytesIO()
bytes_file_obj.write(response.content)
bytes_file_obj.seek(0) #set file object to start

#read excel file and each sheet into pandas dataframe 
df = pd.read_excel(bytes_file_obj, sheet_name = None)
print(df)
Sprookd
  • 149
  • 2
  • 12