Problems with shape files

Question

I want to make a map with the coronavirus infected people by state in US . So the idea is to visualize a map with all states of US and see a range of infected people( i.e 500-2000 , etc) in all the map. This should be represented by different shades of one color. The dark shades would be the states with more cases of coronavirus(states).

So this is my code:

install.packages("sp")
library(sp)

install.packages("sf")
library(sf)

install.packages("maptools")
library(maptools)

install.packages("spdep")
library(spdep)
install.packages("rgdal")
     library(rgdal)
install.packages("RColorBrewer")
library(RColorBrewer)
install.packages("readxl")
library(readxl)




# 
shp_usa <- readOGR("USA_States.shp")
names(shp_usa)                              
shp_usa@data 

# 
infected <- read_excel("C:/Users/josem/OneDrive/Escritorio/infectedUS/CasesUS.xlsx") 
names(infected)      


usa_infected <- shp_usa
usa_infected <- merge(x= shp_usa@data,y= infected,by.x= "STATE_NAME",by.y="State",all.x = TRUE,sort  = FALSE)
summary(usa_infected)



# Map Cases by state USA

    spplot(usa_infected[usa_infected@Cases > 0, ],"Cases.x", at = quantile(usa_infected$Cases.x, p = c(0, .25, .5, .75, 1), na.rm = TRUE), col.regions = brewer.pal(5, "Reds"), main = expression("Cases by State"))

But I have two problems: 1. I don't know what exactly write in this part of the code : by.x=? by.y=? in order to do the task.

usa_infected <- merge(shp_usa@data,infected,by.x= "STATE_NAME",by.y="State",all.x = TRUE,sort  = FALSE)

To visualize the map I have this code

    spplot(usa_infected[usa_infected@Cases > 0, ],"Cases.y", at = quantile(usa_infected$Cases.y, p = c(0, .25, .5, .75, 1), na.rm = TRUE), col.regions = brewer.pal(5, "Reds"), main = expression("Cases by State"))

But after run the code I got this message:

Error in `[.data.frame`(usa_infected, usa_infected@Cases > 0, ) : 


 trying to get slot "Cases" from an object (class "data.frame") that is not an S4 object

I have these 2 data sets: 1- This is from a shp file from USA.

 structure(list(STATE_NAME = structure(c(48L, 42L, 51L, 50L, 46L, 
    24L, 38L, 30L, 16L, 22L, 28L, 33L, 39L, 7L, 40L, 31L, 15L, 29L, 
    45L, 5L, 36L, 14L, 9L, 21L, 6L, 18L, 17L, 47L, 26L, 3L, 37L, 
    34L, 43L, 44L, 25L, 11L, 41L, 4L, 19L, 10L, 23L, 12L, 1L, 27L, 
    20L, 35L, 8L, 13L, 2L, 49L, 32L), .Label = c("Alabama", "Alaska", 
    "Arizona", "Arkansas", "California", "Colorado", "Connecticut", 
    "Delaware", "District of Columbia", "Florida", "Georgia", "Hawaii", 
    "Idaho", "Illinois", "Indiana", "Iowa", "Kansas", "Kentucky", 
    "Louisiana", "Maine", "Maryland", "Massachusetts", "Michigan", 
    "Minnesota", "Mississippi", "Missouri", "Montana", "Nebraska", 
    "Nevada", "New Hampshire", "New Jersey", "New Mexico", "New York", 
    "North Carolina", "North Dakota", "Ohio", "Oklahoma", "Oregon", 
    "Pennsylvania", "Rhode Island", "South Carolina", "South Dakota", 
    "Tennessee", "Texas", "Utah", "Vermont", "Virginia", "Washington", 
    "West Virginia", "Wisconsin", "Wyoming"), class = "factor"), 
        STATE_FIPS = structure(c(48L, 42L, 51L, 50L, 46L, 24L, 38L, 
        30L, 16L, 22L, 28L, 33L, 39L, 7L, 40L, 31L, 15L, 29L, 45L, 
        5L, 36L, 14L, 9L, 21L, 6L, 18L, 17L, 47L, 26L, 3L, 37L, 34L, 
        43L, 44L, 25L, 11L, 41L, 4L, 19L, 10L, 23L, 12L, 1L, 27L, 
        20L, 35L, 8L, 13L, 2L, 49L, 32L), .Label = c("01", "02", 
        "04", "05", "06", "08", "09", "10", "11", "12", "13", "15", 
        "16", "17", "18", "19", "20", "21", "22", "23", "24", "25", 
        "26", "27", "28", "29", "30", "31", "32", "33", "34", "35", 
        "36", "37", "38", "39", "40", "41", "42", "44", "45", "46", 
        "47", "48", "49", "50", "51", "53", "54", "55", "56"), class = "factor"), 
        STATE_ABBR = structure(c(48L, 42L, 51L, 49L, 47L, 24L, 38L, 
        31L, 13L, 20L, 30L, 35L, 39L, 7L, 40L, 32L, 16L, 34L, 45L, 
        5L, 36L, 15L, 8L, 21L, 6L, 18L, 17L, 46L, 25L, 4L, 37L, 28L, 
        43L, 44L, 26L, 11L, 41L, 3L, 19L, 10L, 23L, 12L, 2L, 27L, 
        22L, 29L, 9L, 14L, 1L, 50L, 33L), .Label = c("AK", "AL", 
        "AR", "AZ", "CA", "CO", "CT", "DC", "DE", "FL", "GA", "HI", 
        "IA", "ID", "IL", "IN", "KS", "KY", "LA", "MA", "MD", "ME", 
        "MI", "MN", "MO", "MS", "MT", "NC", "ND", "NE", "NH", "NJ", 
        "NM", "NV", "NY", "OH", "OK", "OR", "PA", "RI", "SC", "SD", 
        "TN", "TX", "UT", "VA", "VT", "WA", "WI", "WV", "WY"), class = "factor"), 
        Cases = c(364, 8, 1, 6, 1, 5, 21, 5, 13, 95, 10, 216, 16, 
        3, 5, 15, 6, 7, 2, 157, 4, 19, 10, 9, 33, 8, 1, 9, 1, 6, 
        2, 7, 9, 21, 1, 22, 9, 1, 13, 26, 2, 2, NA, NA, NA, NA, NA, 
        NA, NA, NA, NA)), class = "data.frame", row.names = c(NA, 
    -51L))

2- And this is database for infected :

structure(list(State = c("Arizona", "Wyoming", "Arkansas", "California", 
"Colorado", "Connecticut", "District of Columbia", "Florida", 
"Georgia", "Hawaii", "Illinois", "Indiana", "Iowa", "Kansas", 
"Kentucky", "Louisiana", "Maryland", "Massachusetts", "Michigan", 
"Minnesota", "Mississippi", "Missouri", "Nebraska", "Nevada", 
"New Hampshire", "New Jersey", "New York", "North Carolina", 
"Ohio", "Oklahoma", "Oregon", "Pennsylvania", "Rhode Island", 
"South Carolina", "South Dakota", "Tennessee", "Texas", "Utah", 
"Vermont", "Virginia", "Washington", "Wisconsin"), Cases = c(6, 
1, 1, 157, 33, 3, 10, 26, 22, 2, 19, 6, 13, 1, 8, 13, 9, 95, strong text
2, 5, 1, 1, 10, 7, 5, 15, 216, 7, 4, 2, 21, 16, 5, 9, 8, 9, 21, 
2, 1, 9, 364, 6)), row.names = c(NA, -42L), class = c("tbl_df", 
"tbl", "data.frame"))

Related post: https://stackoverflow.com/q/60644364/680068 – zx8754 Mar 12 '20 at 07:39 — zx8754, Mar 12 '20 at 07:39

Kempie · Accepted Answer · 2020-03-12T14:45:43.153

1

So first of all, readShapeSpatial is deprecated and should rather use rgdal::readOGR or sf::st_read.

to answer your questions:

1: Your merge is correct if you want to assign new cases to each state in the exel file. (There are already cases in the shape). I prefer assigning the dataframes to the by clause name to show more clearly what is happening) such as:

 merge(x= shp_usa@data,y =infected,by.x= "STATE_NAME",by.y="State",all.x = TRUE,sort  = FALSE)

Note. Duplicate column names will change with a suffix of the link name i.e Cases.x and Cases.y

2: As the warning states "unable to find an inherited method for function ‘spplot’ for signature ‘"data.frame"’)" it means it is trying to plot from the shapefile's data frame and not the spatial file. This is becuase you call the data frame with the @data call (usa_infected@data), try and change this to:

spplot(usa_infected[usa_infected$Cases.y > 0, ],"Cases.y", at = quantile(usa_infected$Cases.y, p = c(0, .25, .5, .75, 1), na.rm = TRUE), col.regions = brewer.pal(5, "Reds"), main = expression("Cases by State"))

edited Mar 12 '20 at 14:45

answered Mar 12 '20 at 07:36

Kempie

263
2
10

Thank you but let me ask you something . So I need to eliminate @data in all the code?. When I did it I got this error mesagge when I tried to run the last line ( ssplot). This is the error:Error in `[.data.frame`(obj@data, zcol) : undefined columns selected – Jose Montoya Mar 12 '20 at 12:28
I actually have not used spplot myself and it depends on what the package and function requires. obj = spatialdataframe obj@data = dataframe. My guess is that the call seems correct, you just need to add a "," to define the columns/rows you need. https://stackoverflow.com/questions/19205806/undefined-columns-selected-when-subsetting-data-frame – Kempie Mar 12 '20 at 13:21
I've already edited the post. Is that ok? I eliminate @data from the code but keep this: shp_usa@data. I already define the column – Jose Montoya Mar 12 '20 at 13:35
Ah, just saw you updated the post. I think it is becuase you use usa_infected@Cases. Both your files have a column named cases and therefore R renames them. Try usa_infected@Cases.y if you need the cases from the excel file. spplot(usa_infected[usa_infected@Cases > 0, ],"Cases.y", at = quantile(usa_infected$Cases.y, p = c(0, .25, .5, .75, 1), na.rm = TRUE), col.regions = brewer.pal(5, "Reds"), main = expression("Cases by State")) – Kempie Mar 12 '20 at 14:04
So to clarify Cases.x is the Cases column from your shape and Cases.y is the cases from your excel. I think it will be good practice to rename them or drop the one that is not needed. – Kempie Mar 12 '20 at 14:07
Thank you! , but I still have the same error message. I¨m gonna try another way.I've just edited the post – Jose Montoya Mar 12 '20 at 14:15
Ah, this comes from being hasty, the why you call data from a dataframe is with the $ sign and not the @. spplot(usa_infected[usa_infected$Cases.y > 0, ],"Cases.y", at = quantile(usa_infected$Cases.y, p = c(0, .25, .5, .75, 1), na.rm = TRUE), col.regions = brewer.pal(5, "Reds"), main = expression("Cases by State")) – Kempie Mar 12 '20 at 14:43
Ok, now I have this message error: Error in (function (classes, fdef, mtable) : unable to find an inherited method for function ‘spplot’ for signature ‘"data.frame"’ – Jose Montoya Mar 12 '20 at 20:48

Problems with shape files

1 Answers1