I am trying to scrape data from Missouri's COVID demographics data from this site and put it into RStudio. Also, this is the Tableau URL I found from the HTML: https://results.mo.gov/t/COVID19/views/Demographics/Public-Demographics?:embed=y&:showVizHome=no&:host_url=https%3A%2F%2Fresults.mo.gov%2F&:embed_code_version=3&:tabs=no&:toolbar=no&:showAppBanner=false&:%E2%80%98iframeSizedToWindow%E2%80%99=%E2%80%98true%E2%80%99&:dataDetails=no&:display_spinner=no&:loadOrderID=0
I've been using this forum here for guidance on how to scrape a public tableau dashboard into R but it doesn't seem to give me any rows or columns for my data frame.
Here is my code I tried to recreate:
library(rvest)
library(rjson)
library(httr)
library(stringr)
tableauHost <- "https://results.mo.gov"
url <- httr::modify_url(tableauHost,
path = "/t/COVID19/views/Demographics/Public-Demographics",
query = list(":embed" = "y",
":showVizHome" = "no",
":host_url" = "https://results.mo.gov/",
":embed_code_version" = 3,
":tabs" = "no",
":toolbar" = "no",
":showAppBanner" = "false",
":'iframeSizedToWindow'" = "'true'",
":dataDetails" = "no",
":display_spinner" = "no",
":loadOrderID" = 0)
)
body <- read_html(url)
data <- body %>%
html_nodes("textarea#tsConfigContainer") %>%
html_text()
json <- fromJSON(data)
url <- modify_url(tableauHost, path = paste(json$vizql_root, "/bootstrapSession/sessions/", json$sessionid, sep =""))
resp <- POST(url, body = list(sheet_id = json$sheetId), encode = "form")
data <- content(resp, "text")
extract <- str_match(data, "\\d+;(\\{.*\\})\\d+;(\\{.*\\})")
info <- fromJSON(extract[1,1])
data <- fromJSON(extract[1,3])
worksheet <- "+ PCR by age"
columnsData <- data$secondaryInfo$presModelMap$vizData$presModelHolder$genPresModelMapPresModel$presModelMap[[worksheet]]$presModelHolder$genVizDataPresModel$paneColumnsData
i <- 1
result <- list();
for(t in columnsData$vizDataColumns){
if (is.null(t[["localBaseColumnName"]]) == FALSE) {
result[[i]] <- list(
localBaseColumnName = t[["localBaseColumnName"]],
valueIndices = columnsData$paneColumnsList[[t$paneIndices + 1]]$vizPaneColumns[[t$columnIndices + 1]]$valueIndices,
aliasIndices = columnsData$paneColumnsList[[t$paneIndices + 1]]$vizPaneColumns[[t$columnIndices + 1]]$aliasIndices,
dataType = t[["dataType"]],
stringsAsFactors = FALSE
)
i <- i + 1
}
}
dataFull = data$secondaryInfo$presModelMap$dataDictionary$presModelHolder$genDataDictionaryPresModel$dataSegments[["0"]]$dataColumns
buildOffset = function(x){
return(if(x>=0) 0 else -1)
}
data_index <- 1
name_index <- 1
frameData <- list();
frameNames <- c()
for(t in dataFull) {
for(index in result) {
if (t$dataType == "cstring"){
if (length(index$valueIndices) > 0) {
j <- 1
vector <- character(length(index$valueIndices))
for (it in index$valueIndices){
vector[j] <- t$dataValues[abs(it)+1]
j <- j + 1
}
frameData[[data_index]] <- vector
frameNames[[name_index]] <- paste(index$localBaseColumnName, "value", sep="-")
data_index <- data_index + 1
name_index <- name_index + 1
}
if (length(index$aliasIndices) > 0) {
j <- 1
vector <- character(length(index$aliasIndices))
for (it in index$aliasIndices){
vector[j] <- t$dataValues[abs(it) + buildOffset(it) + 1]
j <- j + 1
}
frameData[[data_index]] <- vector
frameNames[[name_index]] <- paste(index$localBaseColumnName, "alias", sep="-")
data_index <- data_index + 1
name_index <- name_index + 1
}
}
}
}
columnToKeep = c('[Student Aid Program Type]-value','[Student Aid Program]-value', '[:Measure Names]-alias', '[Multiple Values]-alias')
df <- NULL
for(i in 1:length(frameNames)){
if (frameNames[i] %in% columnToKeep){
df[frameNames[i]] <- frameData[i]
}
}
options(width = 1200) #for readability
df <- as.data.frame(df, stringsAsFactors = FALSE)
print(df)
but it is only giving me this as output
print(df)
data frame with 0 columns and 0 rows
I know the columnToKeep
part where we define the columns for the data frame will not match the data I'm looking for, so would the incorrect number of columns affect this? I've been trying to decipher the original code and implement it with the Missouri COVID's dashboard but seeing it makes me extremely confused everytime. Any help with this would be greatly appreciated!!