account_name=os.getenv("ADLSGEN2_ACCOUNTNAME_62", "<storage account name>") # ADLS Gen2 account name
tenant_id=os.getenv("ADLSGEN2_TENANT_62", "") # tenant id of service principal
client_id=os.getenv("ADLSGEN2_CLIENTID_62", "") # client id of service principal
client_secret=os.getenv("ADLSGEN2_CLIENT_SECRET_62", "") # the secret of service principal
try:
adlsgen2_datastore = Datastore.get(workspace, adlsgen2_datastore_name)
print("Found ADLS Gen2 datastore with name: %s" % adlsgen2_datastore_name)
datastore_paths = [(adlsgen2_datastore, 'path to data.csv')]
dataset = Dataset.Tabular.from_delimited_files(path=datastore_paths)
df = dataset.to_pandas_dataframe()
display(dataset.to_pandas_dataframe())
datastore = adlsgen2_datastore
dataset = Dataset.Tabular.register_pandas_dataframe(df, datastore, "<DataSetStep>", show_progress=True)
except:
adlsgen2_datastore = Datastore.register_azure_data_lake_gen2(
workspace=workspace,
datastore_name=adlsgen2_datastore_name,
filesystem='fs', # Name of ADLS Gen2 filesystem
account_name=account_name, # ADLS Gen2 account name
tenant_id=tenant_id, # tenant id of service principal
client_id=client_id, # client id of service principal
client_secret=client_secret) # the secret of service principal
Reference : https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-data-transfer.ipynb