0

I have a dataframe which gives me months labelled as M1-M12 instead of Jan-Dec. I am trying to convert the M values to month abbreviations and I can't seem to work it out. this is the dput for the original dataframe:

mapoc_temp = structure(list(Longitude = c(-43.5411605834961, -43.5411605834961, 
-43.5411605834961, -43.5411605834961, -43.5411605834961, -43.5411605834961, 
-43.5411605834961, -43.5411605834961, -43.5411605834961, -43.5411605834961, 
-43.5411605834961, -43.5411605834961, -43.5411605834961, -43.5411605834961, 
-43.5411605834961, -43.5411605834961, -43.5411605834961, -43.5411605834961, 
-43.5411605834961, -43.5411605834961, -43.5411605834961, -43.5411605834961, 
-43.5411605834961, -43.5411605834961, -43.5411605834961, -43.5411605834961, 
-43.5411605834961, -43.5411605834961, -43.5411605834961, -43.5411605834961, 
-43.5411605834961, -43.5411605834961, -43.5411605834961, -43.5411605834961, 
-43.5411605834961, -43.5411605834961), Latitude = c(59.950626373291, 
59.950626373291, 59.950626373291, 59.950626373291, 59.950626373291, 
59.950626373291, 59.950626373291, 59.950626373291, 59.950626373291, 
59.950626373291, 59.950626373291, 59.950626373291, 59.950626373291, 
59.950626373291, 59.950626373291, 59.950626373291, 59.950626373291, 
59.950626373291, 59.950626373291, 59.950626373291, 59.950626373291, 
59.950626373291, 59.950626373291, 59.950626373291, 59.950626373291, 
59.950626373291, 59.950626373291, 59.950626373291, 59.950626373291, 
59.950626373291, 59.950626373291, 59.950626373291, 59.950626373291, 
59.950626373291, 59.950626373291, 59.950626373291), Temp = c(-1.1657087802887, 
-1.70908033847809, -1.70908033847809, -1.64846479892731, -1.50903105735779, 
-1.50903105735779, -1.29481840133667, -0.819319725036621, -0.819319725036621, 
0.937921285629272, -0.033661849796772, -0.033661849796772, 3.09912943840027, 
3.3768904209137, 3.3768904209137, 5.44990491867065, 5.90848398208618, 
5.90848398208618, 8.87255096435547, 7.57381582260132, 7.57381582260132, 
9.52607250213623, 9.41888046264648, 9.41888046264648, 7.80030059814453, 
7.23698377609253, 7.23698377609253, 3.53716945648193, 4.55290651321411, 
4.55290651321411, 0.885161995887756, 1.48482501506805, 1.48482501506805, 
-0.0936287492513657, 0.650709450244904, 0.650709450244904), month = c("M1", 
"M1", "M1", "M2", "M2", "M2", "M3", "M3", "M3", "M4", "M4", "M4", 
"M5", "M5", "M5", "M6", "M6", "M6", "M7", "M7", "M7", "M8", "M8", 
"M8", "M9", "M9", "M9", "M10", "M10", "M10", "M11", "M11", "M11", 
"M12", "M12", "M12"), year = c(2016, 2017, 2018, 2016, 2017, 
2018, 2016, 2017, 2018, 2016, 2017, 2018, 2016, 2017, 2018, 2016, 
2017, 2018, 2016, 2017, 2018, 2016, 2017, 2018, 2016, 2017, 2018, 
2016, 2017, 2018, 2016, 2017, 2018, 2016, 2017, 2018)), row.names = c(NA, 
-36L), class = "data.frame")

I've tried using the following code to change the M strings to months:

#Rename my months so they are abbreviated and not M1:M12
mapoc_temp$month = c(M1 = "Jan", M2 = "Feb", M3 = "Mar",
                     M4 = "Apr", M5 = "May", M6 = "Jun",
                     M7 = "Jul", M8 = "Aug", M9 = "Sep",
                     M10 = "Oct", M11 = "Nov", M12 = "Dec")

but when I use that code it gives me different months than my original ones for each of the rows, as you can see in this new dataframe I've posted:

mapoc_temp = structure(list(Longitude = c(-43.5411605834961, -43.5411605834961, 
-43.5411605834961, -43.5411605834961, -43.5411605834961, -43.5411605834961, 
-43.5411605834961, -43.5411605834961, -43.5411605834961, -43.5411605834961, 
-43.5411605834961, -43.5411605834961, -43.5411605834961, -43.5411605834961, 
-43.5411605834961, -43.5411605834961, -43.5411605834961, -43.5411605834961, 
-43.5411605834961, -43.5411605834961, -43.5411605834961, -43.5411605834961, 
-43.5411605834961, -43.5411605834961, -43.5411605834961, -43.5411605834961, 
-43.5411605834961, -43.5411605834961, -43.5411605834961, -43.5411605834961, 
-43.5411605834961, -43.5411605834961, -43.5411605834961, -43.5411605834961, 
-43.5411605834961, -43.5411605834961), Latitude = c(59.950626373291, 
59.950626373291, 59.950626373291, 59.950626373291, 59.950626373291, 
59.950626373291, 59.950626373291, 59.950626373291, 59.950626373291, 
59.950626373291, 59.950626373291, 59.950626373291, 59.950626373291, 
59.950626373291, 59.950626373291, 59.950626373291, 59.950626373291, 
59.950626373291, 59.950626373291, 59.950626373291, 59.950626373291, 
59.950626373291, 59.950626373291, 59.950626373291, 59.950626373291, 
59.950626373291, 59.950626373291, 59.950626373291, 59.950626373291, 
59.950626373291, 59.950626373291, 59.950626373291, 59.950626373291, 
59.950626373291, 59.950626373291, 59.950626373291), Temp = c(-1.1657087802887, 
-1.70908033847809, -1.70908033847809, -1.64846479892731, -1.50903105735779, 
-1.50903105735779, -1.29481840133667, -0.819319725036621, -0.819319725036621, 
0.937921285629272, -0.033661849796772, -0.033661849796772, 3.09912943840027, 
3.3768904209137, 3.3768904209137, 5.44990491867065, 5.90848398208618, 
5.90848398208618, 8.87255096435547, 7.57381582260132, 7.57381582260132, 
9.52607250213623, 9.41888046264648, 9.41888046264648, 7.80030059814453, 
7.23698377609253, 7.23698377609253, 3.53716945648193, 4.55290651321411, 
4.55290651321411, 0.885161995887756, 1.48482501506805, 1.48482501506805, 
-0.0936287492513657, 0.650709450244904, 0.650709450244904), month = c("Jan", 
"Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", 
"Nov", "Dec", "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", 
"Aug", "Sep", "Oct", "Nov", "Dec", "Jan", "Feb", "Mar", "Apr", 
"May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"), year = c(2016, 
2017, 2018, 2016, 2017, 2018, 2016, 2017, 2018, 2016, 2017, 2018, 
2016, 2017, 2018, 2016, 2017, 2018, 2016, 2017, 2018, 2016, 2017, 
2018, 2016, 2017, 2018, 2016, 2017, 2018, 2016, 2017, 2018, 2016, 
2017, 2018)), row.names = c(NA, -36L), class = "data.frame")

As you can see instead of assigning M1 - Jan, and M2 - Feb, and M3 - Mar etc. It gives me the abbreviated months in order no matter what the original value is. Does anyone know how to fix this?

Kristen Cyr
  • 629
  • 5
  • 16
  • Overwriting month with your vector is [still wrong](https://stackoverflow.com/questions/60287389/how-to-change-strings-to-dates-in-r-when-there-is-no-date-format-for-the-values#comment106642685_60287389). – r2evans Feb 18 '20 at 20:34
  • 2
    @Annet's suggestion is spot on. But here's why your code isn't doing what you expect. The code `mapoc_temp$month = c(M1 = "Jan", M2 = "Feb"...` literally tells R to replace the "month" column with the specific sequence ("Jan"-"Feb"-...), erasing whatever was there before. Because R makes use of "vector recyling" and the column that's being replaced has more than 12 values, the 12-month sequence repeats itself as long as it has to in order to fill out the entire column you're replacing. – bschneidr Feb 18 '20 at 20:39
  • @bschneidr it is easy enough to chaneg month_new to month (I also add that to the text). However, it should not give vectors in the environment, which is what kristen cyr is stating happens. – Annet Feb 18 '20 at 20:44
  • 1
    Sorry if I was unclear, @Annet. When I said "your code" in my earlier comment, I was addressing Kristen. There's no reason your (Annet's) code should create a new vector in the environment. I'm confident there's just something weird going on in Kristen's workspace that maybe needs to get cleaned up with a fresh R session or `rm(list=ls())`. – bschneidr Feb 18 '20 at 20:48

1 Answers1

6

using dplyr you might try this:

install.packages("dplyr")
library(dplyr)
mapoc_temp <- mapoc_temp %>%
              mutate(month_new = month.abb[as.numeric(gsub("M","",month))])

The %>% is an indicator of the pipline. month.abb is something from base R that contains all month abbreviations (note that it is the abbreviations for the language your R is in, you can change that if you want see for example). By using gsub I delete the M that is in front of the numbers. These numbers are used to select the correct month in month.abb.

It results in:

   Longitude Latitude       Temp month year month_new
1  -43.54116 59.95063 -1.1657088    M1 2016       Jan
2  -43.54116 59.95063 -1.7090803    M1 2017       Jan
3  -43.54116 59.95063 -1.7090803    M1 2018       Jan
4  -43.54116 59.95063 -1.6484648    M2 2016       Feb
5  -43.54116 59.95063 -1.5090311    M2 2017       Feb
6  -43.54116 59.95063 -1.5090311    M2 2018       Feb
7  -43.54116 59.95063 -1.2948184    M3 2016       Mar
8  -43.54116 59.95063 -0.8193197    M3 2017       Mar
9  -43.54116 59.95063 -0.8193197    M3 2018       Mar
10 -43.54116 59.95063  0.9379213    M4 2016       Apr

If you don't want it in a new variable, just change the month_new to month. But this is for display purposes that the month M# valuse are correctly converted.

Annet
  • 846
  • 3
  • 14
  • I have dplyr. I don't get an error message. Instead of mutating another column it gives me values in my environment where M1 = Jan, etc. – Kristen Cyr Feb 18 '20 at 20:38
  • KristenCyr, there is nothing in that code that will place any other variables in your global environment. If you see `M1` et al there, then is there anything else that might have placed it there some time ago? – r2evans Feb 18 '20 at 20:45
  • 1
    @Annet, please be *very* careful when recommending that command ... it is irreversible, and new users may not realize until too late that all of their efforts need to be redone. It might be trivial for some, but it can be cataclysmic for many. (`format c:` anyone?) – r2evans Feb 18 '20 at 21:03
  • @r2evans fair enough. I will remove it and think of it in the future. – Annet Feb 19 '20 at 07:19