I'm facing a problem with reshaping my data but I'm not sure if reshape2 package is the solution. The original data I need to reshape are stored in a peculiar way. They are daily temperature data in csv files, this is how file header look like:
ID,YEAR,MONTH,NAME,ALTITUDE,REGION,LON,LAT,DATUM,TMAX1,TMAX2,......,TMAX31,TMIN1,TMIN2,..........,TMIN31
where TMAX1
stands for the maximum temperature of day 1 in MONTH
. Then, following values are the maximum temperature for all month days. TMIN1
column gives minimum temperature for day 1 and so on until the last column with minimum temperature for last day in the month. If a month has less than 31 days the field is empty.
Short example data file can be found at link
Reformatting is needed to save data in two new files with just four columns (ID
, DATE
, TEMP
, VALIDTEMP
where TEMP
is TMAX
or TMIN
) with station ID, date, temperature (TMAX
or TMIN
) value and validation flag as seen in the figure:
Thinking over my problem I should create a vector with all possible dates (in the original data only year and month are stated, day comes from the data column number/name) and then make some sort of transposing to fit every daily TMAX
/TMIN
data with its corresponding date in this new data frame.. Not sure if this can be accomplished by reshape2.
I made a simple first attempt with reshape2 but this gives TMAX1
and TMAXn
as different variables while they are all temperature data. I want to melt all TMAXn
/TMIN
in a single variable called TMAX
/TMIN
.
I will continue trying to sort out the problem but any help is greatly appreciated
Output of 20 first rows of original data file
> dput(kk)
structure(list(INDICATIVO = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("8008A",
"8036B", "8251E", "8325C", "8433I", "8472B", "8496E", "8520B"
), class = "factor"), ANYO = c(2015L, 2015L, 2015L, 2015L, 2015L,
2015L, 2015L, 2015L, 2015L, 2015L, 2016L, 2016L, 2016L, 2016L,
2016L, 2016L, 2016L, 2016L, 2016L, 2016L), MES = c(3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L), NOMBRE = structure(c(8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L), .Label = c("ALGEMESSI AUMAR",
"ARENOS (C.H. JUCAR)", "BENIDORM (AQUAGEST)", "CAUDIEL-MAS DE NOGUERA",
"EMBALSE DE ALCORA", "LA POBLA DE BENIFASS\xc0-FREDES I.", "VILLARGORDO DEL CABRIEL-CONTRERAS",
"VILLENA"), class = "factor"), ALTITUD = c(486L, 486L, 486L,
486L, 486L, 486L, 486L, 486L, 486L, 486L, 486L, 486L, 486L, 486L,
486L, 486L, 486L, 486L, 486L, 486L), NOM_PROV = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("ALICANTE", "CASTELLON", "VALENCIA"), class = "factor"),
LONGITUD = c(51562L, 51562L, 51562L, 51562L, 51562L, 51562L,
51562L, 51562L, 51562L, 51562L, 51562L, 51562L, 51562L, 51562L,
51562L, 51562L, 51562L, 51562L, 51562L, 51562L), LATITUD = c(383437L,
383437L, 383437L, 383437L, 383437L, 383437L, 383437L, 383437L,
383437L, 383437L, 383437L, 383437L, 383437L, 383437L, 383437L,
383437L, 383437L, 383437L, 383437L, 383437L), DATUM = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L), .Label = "ETRS89", class = "factor"), TMAX1 = c(230L,
220L, 310L, 280L, 370L, 310L, 330L, 270L, 200L, 180L, 180L,
190L, 170L, 160L, 210L, 290L, 340L, 320L, 300L, 310L), TMAX2 = c(270L,
200L, 310L, 295L, 330L, 330L, 310L, 270L, 160L, 195L, 150L,
200L, 220L, 180L, 225L, 290L, 360L, 330L, 330L, 300L), TMAX3 = c(240L,
220L, 370L, 300L, 370L, 250L, 330L, 230L, 190L, 200L, 170L,
170L, 180L, 190L, 240L, 290L, 340L, 360L, 350L, 290L), TMAX4 = c(230L,
200L, 330L, 300L, 380L, 360L, 290L, 290L, 230L, 200L, 210L,
130L, 220L, 190L, 230L, 300L, 320L, 400L, 390L, 300L), TMAX5 = c(180L,
240L, 290L, 310L, 400L, 360L, 240L, 300L, 220L, 180L, 170L,
140L, 140L, 120L, 170L, 290L, 360L, 330L, 425L, 270L), TMAX6 = c(170L,
150L, 290L, 320L, 390L, 360L, 190L, 280L, 235L, 160L, 110L,
180L, 140L, 180L, 210L, 310L, 340L, 310L, 360L, 330L), TMAX7 = c(250L,
150L, 260L, 310L, 425L, 400L, 220L, 250L, 230L, 160L, 190L,
130L, 90L, 195L, 190L, 310L, 310L, 310L, 340L, 310L), TMAX8 = c(180L,
170L, 290L, 320L, 400L, 325L, 250L, 220L, 200L, 200L, 220L,
160L, 140L, 150L, 180L, 340L, 360L, 340L, 340L, 290L), TMAX9 = c(200L,
180L, 290L, 330L, 340L, 330L, 240L, 230L, 220L, 160L, 200L,
180L, 140L, 200L, 190L, 360L, 350L, 330L, 320L, 270L), TMAX10 = c(195L,
200L, 270L, 310L, 360L, 390L, 320L, 250L, 240L, 135L, 170L,
210L, 120L, 190L, 190L, 350L, 360L, 250L, 320L, 260L), TMAX11 = c(260L,
180L, 310L, 240L, 340L, 350L, 300L, 270L, 230L, 140L, 100L,
210L, 150L, 200L, 210L, 320L, 370L, 290L, 300L, 300L), TMAX12 = c(260L,
190L, 340L, 300L, 355L, 380L, 290L, 270L, 190L, 140L, 140L,
220L, 130L, 230L, 210L, 310L, 370L, 290L, 320L, 230L), TMAX13 = c(185L,
200L, 380L, 290L, 360L, 350L, 320L, 240L, 210L, 170L, 150L,
220L, 150L, 200L, 210L, 375L, 300L, 300L, 300L, 190L), TMAX14 = c(160L,
260L, 415L, 170L, 360L, 320L, 300L, 230L, 200L, 180L, 160L,
170L, 150L, 230L, 240L, 350L, 280L, 310L, 250L, 230L), TMAX15 = c(130L,
230L, 390L, 260L, 360L, 300L, 300L, 250L, 200L, 210L, 120L,
140L, 150L, 240L, 230L, 320L, 270L, 320L, 260L, 240L), TMAX16 = c(150L,
240L, 370L, 270L, 360L, 300L, 330L, 230L, 230L, 215L, 160L,
120L, 180L, 250L, 230L, 270L, 280L, 260L, 290L, 250L), TMAX17 = c(140L,
260L, 380L, 300L, 370L, 350L, 270L, 230L, 230L, 210L, 130L,
130L, 150L, 240L, 220L, 280L, 310L, 375L, 300L, 290L), TMAX18 = c(120L,
250L, 280L, 280L, 350L, 320L, 250L, 240L, 220L, 215L, 170L,
130L, 160L, 220L, 250L, 300L, 320L, 370L, 280L, 230L), TMAX19 = c(140L,
240L, 280L, 320L, 380L, 320L, 265L, 220L, 240L, 220L, 140L,
120L, 150L, 180L, 260L, 280L, 340L, 370L, 300L, 220L), TMAX20 = c(120L,
215L, 230L, 310L, 370L, 330L, 290L, 190L, 260L, 180L, 150L,
160L, 170L, 200L, 250L, 280L, 380L, 390L, 270L, 220L), TMAX21 = c(170L,
200L, 220L, 310L, 370L, 330L, 330L, 200L, 200L, 185L, 140L,
170L, 140L, 230L, 250L, 295L, 360L, 300L, 310L, 200L), TMAX22 = c(140L,
240L, 220L, 400L, 370L, 330L, 340L, 210L, 130L, 155L, 160L,
220L, 130L, 210L, 300L, 310L, 340L, 320L, 310L, 210L), TMAX23 = c(160L,
270L, 240L, 350L, 340L, 340L, 300L, 230L, 100L, 160L, 190L,
180L, 170L, 230L, 240L, 320L, 310L, 330L, 290L, 240L), TMAX24 = c(150L,
250L, 230L, 310L, 390L, 330L, 260L, 190L, 130L, 150L, 190L,
200L, 200L, 240L, 270L, 350L, 310L, 330L, 280L, NA), TMAX25 = c(110L,
260L, 250L, 310L, 350L, 320L, 280L, 190L, 180L, 160L, 190L,
160L, 230L, 200L, 300L, 330L, 310L, 330L, 290L, NA), TMAX26 = c(150L,
260L, 290L, 335L, 340L, 320L, 280L, 190L, 190L, 160L, 140L,
160L, 240L, 250L, 280L, 300L, 325L, 310L, 280L, NA), TMAX27 = c(230L,
220L, 260L, 360L, 420L, 330L, 260L, 230L, 160L, 160L, 130L,
85L, 210L, 250L, 300L, 320L, 340L, 340L, 270L, NA), TMAX28 = c(260L,
250L, 260L, 390L, 350L, 350L, 230L, 200L, 190L, 160L, 170L,
150L, 170L, 220L, 300L, 320L, 330L, 360L, 240L, NA), TMAX29 = c(260L,
250L, 300L, 370L, 390L, 330L, 210L, 230L, 180L, 180L, 130L,
150L, 250L, 130L, 260L, 330L, 350L, 350L, 260L, NA), TMAX30 = c(280L,
260L, 300L, 370L, 340L, 315L, 230L, 220L, 230L, 180L, 170L,
NA, 240L, 240L, 290L, 350L, 380L, 310L, 270L, NA), TMAX31 = c(310L,
NA, 290L, NA, 330L, 340L, NA, 200L, NA, 210L, 210L, NA, 175L,
NA, 270L, NA, 400L, 310L, NA, NA), TMIN1 = c(70L, 60L, 70L,
130L, 160L, 210L, 210L, 130L, 90L, -50L, 50L, 20L, 20L, 40L,
90L, 90L, 170L, 240L, 155L, 110L), TMIN2 = c(130L, 20L, 140L,
130L, 200L, 210L, 190L, 90L, 60L, -15L, 30L, -10L, 40L, -10L,
10L, 100L, 140L, 150L, 150L, 130L), TMIN3 = c(70L, 30L, 90L,
100L, 210L, 190L, 165L, 100L, 60L, -10L, 35L, 10L, 80L, 30L,
30L, 130L, 210L, 140L, 130L, 110L), TMIN4 = c(70L, 80L, 150L,
90L, 190L, 180L, 200L, 120L, 50L, 0L, 120L, -10L, -10L, 70L,
30L, 130L, 200L, 160L, 130L, 110L), TMIN5 = c(-20L, 100L,
150L, 100L, 150L, 220L, 170L, 150L, 90L, -10L, 70L, 20L,
60L, 80L, 55L, 120L, 180L, 210L, 160L, 150L), TMIN6 = c(-30L,
55L, 135L, 80L, 150L, 230L, 170L, 130L, 90L, 70L, 50L, 10L,
35L, 100L, 70L, 110L, 190L, 170L, 190L, 120L), TMIN7 = c(-30L,
80L, 70L, 70L, 150L, 240L, 170L, 120L, 100L, 80L, 40L, 65L,
-20L, 75L, 90L, 130L, 160L, 130L, 140L, 130L), TMIN8 = c(30L,
50L, 70L, 100L, 180L, 200L, 150L, 120L, 90L, 40L, 10L, 95L,
20L, 50L, 90L, 125L, 170L, 150L, 150L, 120L), TMIN9 = c(50L,
80L, 110L, 110L, 210L, 210L, 125L, 100L, 80L, 30L, 85L, 60L,
-20L, 0L, 110L, 140L, 210L, 130L, 160L, 140L), TMIN10 = c(-10L,
70L, 70L, 80L, 180L, 230L, 180L, 100L, 30L, 30L, 50L, 130L,
40L, 20L, 80L, 160L, 160L, 180L, 150L, 120L), TMIN11 = c(-10L,
80L, 50L, 110L, 160L, 220L, 170L, 130L, 25L, 30L, 50L, 120L,
-20L, 50L, 100L, 180L, 170L, 140L, 210L, 150L), TMIN12 = c(20L,
90L, 60L, 150L, 180L, 210L, 140L, 120L, 25L, 30L, 20L, 90L,
-10L, 50L, 75L, 160L, 180L, 140L, 140L, 100L), TMIN13 = c(20L,
110L, 80L, 150L, 160L, 200L, 150L, 140L, 100L, 20L, -15L,
160L, -30L, 60L, 100L, 150L, 200L, 105L, 140L, 120L), TMIN14 = c(20L,
70L, 120L, 130L, 160L, 190L, 180L, 130L, 100L, 0L, -15L,
70L, -10L, 40L, 110L, 220L, 200L, 120L, 140L, 90L), TMIN15 = c(-5L,
115L, 110L, 140L, 170L, 180L, 125L, 40L, 60L, 0L, 40L, 60L,
40L, 50L, 110L, 170L, 180L, 120L, 80L, 60L), TMIN16 = c(50L,
100L, 60L, 100L, 155L, 160L, 120L, 115L, 50L, 20L, -30L,
55L, 10L, 50L, 80L, 170L, 150L, 160L, 100L, 50L), TMIN17 = c(-10L,
80L, 60L, 110L, 170L, 210L, 140L, 90L, 40L, 5L, -60L, -80L,
5L, 100L, 50L, 120L, 100L, 155L, 90L, 110L), TMIN18 = c(70L,
50L, 50L, 120L, 170L, 205L, 120L, 100L, 0L, 0L, 70L, 0L,
90L, 80L, 60L, 100L, 130L, 185L, 110L, 160L), TMIN19 = c(100L,
90L, 160L, 120L, 180L, 220L, 140L, 100L, 30L, -10L, 50L,
70L, 100L, 110L, 80L, 100L, 130L, 180L, 100L, 160L), TMIN20 = c(110L,
60L, 130L, 120L, 190L, 200L, 100L, 140L, 50L, -30L, -15L,
-15L, 50L, 110L, 80L, 110L, 180L, 200L, 160L, 130L), TMIN21 = c(90L,
40L, 80L, 130L, 175L, 175L, 120L, 90L, 50L, -10L, 0L, 40L,
50L, 80L, 80L, 100L, 170L, 220L, 140L, 130L), TMIN22 = c(100L,
100L, 60L, 130L, 195L, 175L, 120L, 70L, -10L, 0L, 25L, 60L,
80L, 80L, 80L, 100L, 150L, 170L, 130L, 110L), TMIN23 = c(70L,
70L, 50L, 150L, 190L, 170L, 130L, 50L, -10L, 20L, 15L, 20L,
85L, 50L, 120L, 110L, 180L, 150L, 130L, 150L), TMIN24 = c(80L,
60L, 60L, 140L, 200L, 210L, 100L, 90L, -45L, 0L, 20L, 20L,
25L, 80L, 100L, 110L, 140L, 135L, 125L, 130L), TMIN25 = c(30L,
110L, 65L, 150L, 230L, 150L, 90L, 130L, -20L, -20L, 70L,
90L, 20L, 40L, 110L, 125L, 140L, 130L, 135L, NA), TMIN26 = c(10L,
110L, 80L, 150L, 230L, 150L, 90L, 130L, 100L, 20L, 40L, 20L,
115L, 40L, 130L, 170L, 140L, 145L, 185L, NA), TMIN27 = c(70L,
100L, 100L, 150L, 200L, 140L, 130L, 120L, 10L, 35L, 40L,
40L, 70L, 70L, 120L, 170L, 150L, 140L, 160L, NA), TMIN28 = c(70L,
90L, 90L, 150L, 200L, 140L, 110L, 70L, 10L, -20L, 25L, 40L,
90L, 120L, 150L, 170L, 160L, 160L, 170L, NA), TMIN29 = c(60L,
60L, 90L, 150L, 200L, 160L, 100L, 80L, -30L, 50L, 60L, 60L,
75L, 110L, 150L, 200L, 140L, 200L, 120L, NA), TMIN30 = c(90L,
60L, 70L, 150L, 210L, 170L, 130L, 65L, -10L, 0L, 60L, NA,
10L, 110L, 140L, 180L, 170L, 220L, 130L, NA), TMIN31 = c(110L,
NA, 130L, NA, 210L, 220L, NA, 100L, NA, 70L, 80L, NA, 65L,
NA, 120L, NA, 210L, 170L, NA, NA)), .Names = c("INDICATIVO",
"ANYO", "MES", "NOMBRE", "ALTITUD", "NOM_PROV", "LONGITUD", "LATITUD",
"DATUM", "TMAX1", "TMAX2", "TMAX3", "TMAX4", "TMAX5", "TMAX6",
"TMAX7", "TMAX8", "TMAX9", "TMAX10", "TMAX11", "TMAX12", "TMAX13",
"TMAX14", "TMAX15", "TMAX16", "TMAX17", "TMAX18", "TMAX19", "TMAX20",
"TMAX21", "TMAX22", "TMAX23", "TMAX24", "TMAX25", "TMAX26", "TMAX27",
"TMAX28", "TMAX29", "TMAX30", "TMAX31", "TMIN1", "TMIN2", "TMIN3",
"TMIN4", "TMIN5", "TMIN6", "TMIN7", "TMIN8", "TMIN9", "TMIN10",
"TMIN11", "TMIN12", "TMIN13", "TMIN14", "TMIN15", "TMIN16", "TMIN17",
"TMIN18", "TMIN19", "TMIN20", "TMIN21", "TMIN22", "TMIN23", "TMIN24",
"TMIN25", "TMIN26", "TMIN27", "TMIN28", "TMIN29", "TMIN30", "TMIN31"
), row.names = 786:805, class = "data.frame")