Just to put it out there, here's my solution for extracting data from Dynamics Web API using R. You can have Dynamics auto-generate Fetch XML for any data you need in the "Advanced Find" view. This script uses that auto-generated Fetch XML to extract any data you like.
Warning: This Web API is very difficult to work with. It took a lot of research to put this together.

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#~~ Introduction
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Purpose: Download data from Microsoft Dynamics using a fetch XML request
# Created: 2/23/2021
# Modified: 10/1/2021
# Author: Ryan Bradley
#
# Resources on this topic:
# https://github.com/r-lib/httr/blob/master/R/oauth-token.r
# https://github.com/r-lib/httr/blob/master/demo/oauth2-azure.r
# https://learn.microsoft.com/en-us/azure/active-directory/develop/quickstart-register-app
# https://blog.r-hub.io/2021/01/25/oauth-2.0/
# https://learn.microsoft.com/en-us/powerapps/developer/data-platform/authenticate-oauth
# https://learn.microsoft.com/en-us/dynamics365/customerengagement/on-premises/developer/webapi/discover-url-organization-web-api
# https://learn.microsoft.com/en-us/powerapps/developer/data-platform/webapi/retrieve-and-execute-predefined-queries#use-custom-fetchxml
# https://community.dynamics.com/365/f/dynamics-365-general-forum/378416/resource-not-found-for-the-segment-error-getting-custom-entity-from-common-data-service-web-api
# https://learn.microsoft.com/en-us/dynamics365/customerengagement/on-premises/developer/introduction-entities#:~:text=The%20entities%20are%20used%20to,Engagement%20(on%2Dpremises).&text=An%20entity%20has%20a%20set,%2C%20Address%20%2C%20and%20OwnerId%20attributes.
# https://datascienceplus.com/accessing-web-data-json-in-r-using-httr/
# https://learn.microsoft.com/en-us/powerapps/developer/data-platform/authenticate-oauth
# https://stackoverflow.com/questions/3541711/url-encode-sees-ampersand-as-amp-html-entity
# https://www.inogic.com/blog/2019/04/handling-special-characters-while-executing-fetch-xml-programmatically-in-web-api-rest-call/
# https://stackoverflow.com/questions/3541711/url-encode-sees-ampersand-as-amp-html-entity
# https://truenorthit.co.uk/2014/07/dynamics-crm-paging-cookies-some-gotchas/
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#~~ First time set up and maintenance
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#...........................................................................................
#.. Encode your active directory user name and password with key ring
#...........................................................................................
# This is only relevant if you need to authenticate through a proxy.
#
# You will need to install the keyring package and set your active directory user name
# and password. You can run remove the comments from the 4 lines below and run them to
# install the package and set your AD user name and password.
#
# You will also need to repeat these steps when your active directory information changes.
#
# install.packages("keyring")
# library(keyring)
# keyring::key_set("id")
# keyring::key_set("pw")
#
#...........................................................................................
#.. Get a token to authenticate with Microsoft Dynamics
#...........................................................................................
#
# 1. Log into the azure portal
# https://portal.azure.com/#home
# 2. Register a new app
# 3. Generate a client secret on the "Certificates & secrets" page. Save it for later.
# 4. Create an application scope on the "Expose an API" page.
# 5. Grant the app "user_impersonation" access to "Dynamics CRM (1)"
# 6. Meet with the an Active Directory IT administrator, and have them click
# "Grand Admin consent for Consumers Energy" on the "API Permissions" page of your app.
#
# If all those steps worked, you should now be able to authenticate using the code below.
#
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#~~ Hard-coded variables
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Define global setting for importing strings as factors
options(stringsAsFactors = FALSE)
options(httr_oauth_cache=T)
#...........................................................................................
#.. Azure App Data
#...........................................................................................
# Found here: https://portal.azure.com/#home
# Azure app ID
# Source: "Overview" tab of your application in the Azure portal
client_id <- "YOUR ID"
# App name
# Source: "Overview" tab of your application in the Azure portal
app_name <- "MY_APP" # not important for authorization grant flow
# Secret ID
# Source: "Clients & Secrets" tab of your application in the Azure portal
client_secret <- "YOUR SECRET"
# Application ID URI.
# Source: "Expose an API" tab of your application in the Azure portal
application_id_uri = "YOUR URI "
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#~~ Load or install packages
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Load or install librarian
if(require(librarian) == FALSE){
install.packages("librarian")
if(require(librarian)== FALSE){stop("Unable to install and load librarian")}
}
# Load multiple packages using the librarian package
librarian::shelf(tidyverse, readxl, RODBC, lubridate, httr, XML, jsonlite, rlist, httpuv, quiet = TRUE)
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#~~ Set up global httr proxy configuration
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# This is only needed if you're connecting through a proxy
# set_config(use_proxy(url="yourproxy.com",port= 1234
# ,username=keyring::key_get("id")
# ,password=keyring::key_get("pw")
# )
# )
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#~~ Authenticate with Dynamics
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Use the default Azure endpoints default ones
azure_endpoint <- oauth_endpoints("azure")
# Create the app instance.
myapp <- oauth_app(
appname = app_name,
key = client_id,
secret = client_secret
)
# Step through the authorization chain:
# 1. You will be redirected to you authorization endpoint via web browser.
# 2. Once you responded to the request, the endpoint will redirect you to
# the local address specified by httr.
# 3. httr will acquire the authorization code (or error) from the data
# posted to the redirect URI.
# 4. If a code was acquired, httr will contact your authorized token access
# endpoint to obtain the token.
mytoken <- oauth2.0_token(azure_endpoint,
myapp,
scope = application_id_uri,
cache = str_cache
)
if (("error" %in% names(mytoken$credentials)) && (nchar(mytoken$credentials$error) > 0)) {
errorMsg <- paste("Error while acquiring token.",
paste("Error message:", mytoken$credentials$error),
paste("Error description:", mytoken$credentials$error_description),
paste("Error code:", mytoken$credentials$error_codes),
sep = "\n"
)
stop(errorMsg)
}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#~~ Begin making requests
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#...........................................................................................
#.. Dynamics API data
#...........................................................................................
# The URL of the Dynamics instace you'd like to connect to
base_url <- "https://YOURPLATFORM.dynamics.com/api/data/v9.2/"
# This part of the URL is added on to the base URL to use the web API
# to send on a fetchxml request
url_add_on <- "?fetchXml="
# XML retrieved by downloading fetch XML from Microsoft Dyanmics using the "Advanced Find" view.
# You may need to edit the auto-generated XML, it's not always great. Consider making the alias
# something legible, like I did below.
#
# I copied and pasted the XML into notepad and used find and replace to escape all the quotes.
# " -> \"
#
# Make sure your "order attribute" XML code is unique to every row in the data set.
# If it's not, if may cause issues when pulling data with more than 5,000 rows.
#
xml <- "<fetch version=\"1.0\" output-format=\"xml-platform\" mapping=\"logical\" distinct=\"false\">
<entity name=\"amendments\">
<attribute name=\"seasonyear\" />
<attribute name=\"enrollmenttype\" />
<attribute name=\"effectivestartdate\" />
<attribute name=\"effectiveenddate\" />
<attribute name=\"contractstartdate\" />
<attribute name=\"contractenddate\" />
<attribute name=\"program\" />
<attribute name=\"programduration\" />
<attribute name=\"programtype\" />
<attribute name=\"kwnominations\" />
<attribute name=\"renewalkw\" />
<attribute name=\"newkw\" />
<attribute name=\"account\" />
<attribute name=\"ownerid\" />
<attribute name=\"amendmentsid\" />
<attribute name=\"statuscode\" />
<attribute name=\"statecode\" />
<attribute name=\"amendment\" />
<attribute name=\"name\" />
<order attribute=\"amendmentsid\" descending=\"false\" />
<link-entity name=\"salesorder\" from=\"salesorderid\" to=\"order\" visible=\"false\" link-type=\"outer\" alias=\"sales\">
<attribute name=\"datecontractapproved\" />
</link-entity>
<link-entity name=\"account\" from=\"accountid\" to=\"account\" visible=\"false\" link-type=\"outer\" alias=\"account\">
<attribute name=\"accountmanager\" />
<attribute name=\"accountnumber\" />
<attribute name=\"statecode\" />
</link-entity>
</entity>
</fetch>"
# Encode the XML into a URL
url_xml <- URLencode(xml)
# Set the dynamics entity/table you wish to use.
# Entities = Dynamics tables, and attributes = Dynamics columns
# NOTE: MUST BE PLURAL. If your entity is "contact" then put "contacts" and if
# your entity already ends in "s" try adding "es." Example -> amendments -> amendmentses
#
# You should be able to find the entity name in the first or 2nd line of an auto-generated
# XML. Example: <entity name=\"amendments\">
dynamics_entity <- "amendmentses"
# Create a Web API query URL
url_fetch <- paste0(base_url, dynamics_entity, url_add_on, url_xml)
# url_fetch
# Send GET request.
resp <- GET(url = url_fetch
, config(token = mytoken)
, add_headers(Prefer = "odata.include-annotations=\"*\"") # This header is required to get legible text returned along with a paging cookies (if applicable)
)
#...........................................................................................
#.. Check for a valid API response
#...........................................................................................
if(http_error(resp) == TRUE){
print("Authentication error, unable to proceed.")
} else {
# Convert the hexadecimal content response to a string
resp_json <- rawToChar(resp[["content"]])
# Decode the JSON response
resp_list <- fromJSON(resp_json)
# Extract the data frame values into a stand-alone data frame
df_data_raw <- resp_list[["value"]]
# Extract paging cookie data (This is only passed if there is more than 1 page of results.
# by default an API query is limited to 5000 rows, so any extra rows are on additional
# pages that need to be queried.)
paging_cookie_resp <- resp_list[["@Microsoft.Dynamics.CRM.fetchxmlpagingcookie"]]
# Check for a paging cookie
if(length(paging_cookie_resp) == 0){
print("No paging cookie returned, only one page of results.")
} else {
print("Retrieving multiple pages of results.")
# Set the starting page number
page_number <- 1
# Create a variable to determine when we've found the last page of data
last_page_found <- FALSE
while(last_page_found == FALSE){
#...........................................................................................
#.. Retrieve multiple pages of results (only applies to data sets with > 5,000 rows)
#...........................................................................................
# Split the paging cookie data into a list
lst_paging_cookie_resp <- str_split(paging_cookie_resp,"\"")
# Retrieve the double-URL-encoded paging cookie
encoded_paging_cookie <- lst_paging_cookie_resp[[1]][4]
# The paging cookie is DOUBLE url-encoded, so you first need to decode it TWICE. (What a pain this was to figure out)
decoded_paging_cookie <- URLdecode(URLdecode(encoded_paging_cookie))
# Split the de-coded paging cookie data into a list (so we can extract the page number)
lst_decoded_paging_data <- str_split(decoded_paging_cookie,"\"")
# If the paging cookie comes in double-quotes, remove the the quotes at the
# beginning and end of the string
decoded_paging_cookie <- str_remove(decoded_paging_cookie,"^\"")
decoded_paging_cookie <- str_remove(decoded_paging_cookie,"\"$")
# Replace any special characters with their HTML equivalents
decoded_paging_cookie <- str_replace_all(decoded_paging_cookie,"&","&")
decoded_paging_cookie <- str_replace_all(decoded_paging_cookie,"<","<")
decoded_paging_cookie <- str_replace_all(decoded_paging_cookie,">",">")
decoded_paging_cookie <- str_replace_all(decoded_paging_cookie,"\"",""")
# URI encode to the paging cookie (This must be done so the API can receive it)
URI_encoded_paging_cookie <- encodeURIComponent(decoded_paging_cookie)
# Increment the page number by 1
page_number = page_number + 1
# Create a URL-encoded fetch-XML header that we can add into the existing the URL-encoded XML that
# We originally sent to the API
xml_header <- "paging-cookie=\"PutPagingCookieHere\" page=\"PutPageNumberHere\" distinct="
url_encoded_xml_header <- URLencode(xml_header)
# Splice in the URI-encoded paging cookie and page number
url_encoded_xml_header <- url_encoded_xml_header %>%
str_replace("PutPagingCookieHere",URI_encoded_paging_cookie) %>%
str_replace("PutPageNumberHere",as.character(page_number))
# We now have the paging cookie and page number in the appropriate URL and URI encoded formats.
# We can now splice this extra information into the XML header of our original API request.
new_url_xml <- str_replace(url_xml,"distinct=",url_encoded_xml_header)
# Create a new Web API query URL with the updated XML data
url_fetch <- paste0(base_url, dynamics_entity, url_add_on, new_url_xml)
# url_fetch
# Retrieve the next page of data
resp <- GET(url = url_fetch
, config(token = mytoken)
, add_headers(Prefer = "odata.include-annotations=\"*\"") # This header is required to get legible text returned
)
# Check for an error returned in the response
if(http_error(resp) == TRUE){
print("Error while retrieving 2nd page of results, unable to proceed.")
last_page_found <- TRUE
} else {
# Convert the hexadecimal content response to a string
resp_json <- rawToChar(resp[["content"]])
# Decode the JSON response
resp_list <- fromJSON(resp_json)
# Extract the data frame values into a stand-alone data frame
df_data_raw_next_page <- resp_list[["value"]]
# The API only returns columns that hold data. To make sure our columns match,
# we need to add any columns missing from either data frame to the other data frame
# so we can join them.
# Add any columns missing from the original data frame to the new one
prev_page_names <- names(df_data_raw) # Vector of columns you want in this data.frame
missing <- setdiff(prev_page_names, names(df_data_raw_next_page)) # Find names of missing columns
df_data_raw_next_page[missing] <- NA # Add them, filled with NA results
# Add any columns missing from the new data frame to the original one
next_page_names <- names(df_data_raw_next_page) # Vector of columns you want in this data.frame
missing <- setdiff(next_page_names, names(df_data_raw)) # Find names of missing columns
df_data_raw[missing] <- NA # Add them, filled with NA results
# Append these rows onto the original data frame and
# filter out any extra rows from the join
df_data_raw <- df_data_raw %>%
rbind(df_data_raw_next_page, use.names=TRUE) %>%
filter(`@odata.etag` != "TRUE") %>%
distinct()
# Extract paging cookie data (This is only passed if there is more than 1 page of results.
# by default an API query is limited to 5000 rows, so any extra rows are on additional
# pages that need to be queried.)
paging_cookie_resp <- resp_list[["@Microsoft.Dynamics.CRM.fetchxmlpagingcookie"]]
# Note if we're on the last page of results so we exit the loop
if(nrow(df_data_raw_next_page) < 5000){
last_page_found <- TRUE
} else {
print(paste0("Page ",page_number," retrieved, retrieving page ", page_number + 1))
}
}
}
}
#...........................................................................................
#.. Clean the returned column names
#...........................................................................................
# Keep formatted columns only, removing the non-formatted versions from the data frame.
# The API gives 2 versions of each formatted column, a formatted version and a non-formatted version with the GUID.
# We only want the formatted version, since that's readable to the human eye. We don't want a GUID.
# Set the starting index to 1
i <- 1
# Loop over the columns in the data frame
while(i <= length(names(df_data_raw))){
# Extract the current column name
str_col_name <-names(df_data_raw)[i]
# print(paste0("Cleaning column ",i," - ",str_col_name))
# Proceed if we have a column returned
if(is.na(str_col_name) == FALSE){
# Check for unwanted meta data columns we can remove
condition_1 <- grepl("@Microsoft.Dynamics.CRM",str_col_name, ignore.case = TRUE)
condition_2 <- grepl("Display.V1.AttributeName",str_col_name, ignore.case = TRUE)
condition_3 <- grepl("@odata_etag",str_col_name, ignore.case = TRUE)
condition_4 <- grepl("@odata.etag",str_col_name, ignore.case = TRUE)
# Check to see if an unwanted column has been found
if(condition_1 | condition_2 | condition_3 | condition_4){
# Remove the column
df_data_raw <- df_data_raw %>%
select(-all_of(str_col_name))
# Reset the index since a column was removed
i <- 0
} else {
# Check to see if it's formatted column
if(grepl("@OData.Community.Display.V1.FormattedValue",str_col_name, ignore.case = TRUE)){
# Extract the base column name by removing the huge suffix "@OData.Community.Display.V1.FormattedValue"
str_base_col <- str_replace(str_col_name,"@OData.Community.Display.V1.FormattedValue","")
# Remove the base column if it exists (leaving us with only the formatted version of
# the column, not the original version of it.)
df_data_raw <- df_data_raw %>%
select(-all_of(str_base_col))
# Reset the index since a column was removed
i <- 0
} else {
str_base_col <- str_col_name
}
# Remove any prefixes or suffixes from the column name
str_new_col <- str_replace(str_base_col,"_","") # Remove "_" prefix
str_new_col <- str_replace(str_new_col,"^_","") # Remove "_" prefix
str_new_col <- str_replace(str_new_col,"\\.","_") # Replace any periods with an underscore
str_new_col <- str_replace(str_new_col,"_value$","") # Remove "_value" suffix
# Re-name the old column name to the new one
df_data_raw <- df_data_raw %>%
rename(!!str_new_col := all_of(str_col_name))
# If this is a character column that has the word "date" in it attempt to convert it to the
# a date-type column.
if(grepl("date",str_new_col,ignore.case = TRUE) & typeof(df_data_raw[,i]) == "character"){
print(paste0("Attempting to convert ",str_new_col," to a date format."))
# Attempt to assign the proper data type
df_data_raw <- df_data_raw %>%
mutate(attempt_mdy = mdy(!!as.symbol(str_new_col)))
# If we had at least 1 successful conversion, convert the
# column to the date format
if(sum(is.na(df_data_raw$attempt_mdy) == FALSE) > 0){
df_data_raw <- df_data_raw %>%
mutate(!!as.symbol(str_new_col) := attempt_mdy)
}
# Drop the attempt_mdy column
df_data_raw <- df_data_raw %>%
select(-attempt_mdy)
}
}
}
# Move up the index to the next column
i <- i + 1
}
# Select desired columns
df_data <- df_data_raw %>%
select(contract_account_number = account_accountnumber
, program_duration = programduration
, contract_start_date = contractstartdate
, contract_end_date = contractenddate
, season_year = seasonyear
, state_code = statecode
, status_code = statuscode
, account
, account_state_code = account_statecode
, program
, name
, kw_new = newkw
, kw_renewal = renewalkw
, kw_nomination = kwnominations
, account_manager = account_accountmanager
, contract_approval_date = sales_datecontractapproved
, enrollment_type = enrollmenttype
, effective_start_date = effectivestartdate
, effective_end_date = effectiveenddate
, owner = ownerid
, program_type = programtype
, amendment
, amendment_id = amendmentsid
)
# Set numeric data types
df_data <- df_data %>%
# Remove commas
mutate(season_year = gsub(",","",season_year)
, kw_new = gsub(",","",kw_new)
, kw_renewal = gsub(",","",kw_renewal)
, kw_nomination = gsub(",","",kw_nomination)) %>%
# Convert to numeric values
mutate(season_year = as.numeric(season_year)
, kw_new = as.numeric(kw_new)
, kw_renewal = as.numeric(kw_renewal)
, kw_nomination = as.numeric(kw_nomination))
# Add a load_date_time column
df_data$load_date_time <- Sys.time()
}