0

if I have three time-series tables like:

df1 <- read.table(text = " Date V1 V2
2000-01-07 5 1
2000-01-08 1 4
2000-01-09 4 3
2000-01-10 0 0", 
              header = TRUE, stringsAsFactors = FALSE)

df2 <- read.table(text = " Date V1 V2
2000-01-01 1 1
2000-01-02 0 0
2000-01-03 4 6
2000-01-04 6 5
2000-01-05 3 0
2000-01-06 3 0
2000-01-07 7 4
2000-01-08 9 0
2000-01-09 0 0
2000-01-10 0 0", 
              header = TRUE, stringsAsFactors = FALSE)

df3 <- read.table(text = " Date V1 V2
2000-01-01 4 3
2000-01-02 4 0
2000-01-03 6 1
2000-01-04 7 5", 
              header = TRUE, stringsAsFactors = FALSE)

how could I create a table df4 which contains the oldest dates among the tables. then it organizes the seconds column of each table and finally the thirds columns of each table. Note that if the date doesn't exist.. if fills columns with NA.

df4 <- read.table(text = " Date df1_V1 df2_V1 df3_V1 df1_V2 df2_V2  df3_V2
2000-01-01 NA 1 4 NA 1 3
2000-01-02 NA 0 4 NA 0 0
2000-01-03 NA 4 6 NA 6 1
2000-01-04 NA 6 7 NA 5 5
2000-01-05 NA 3 NA NA 0 NA
2000-01-06 NA 3 NA NA 0 NA
2000-01-07 5 7 NA 1 4 NA
2000-01-08 1 9 NA 4 0 NA
2000-01-09 4 0 NA 3 0 NA
2000-01-10 0 0 NA 0 0 NA", 
              header = TRUE, stringsAsFactors = FALSE)
  • Try `Reduce(function(...)merge(..., by = 'Date', all = TRUE), mget(ls(pattern = 'df[0-9]+')))` – Sotos Feb 20 '18 at 15:06

2 Answers2

0

You can do it just by creating the date vector and merging your dataframes in:

df1 <- read.table(text = " Date V1 V2
2000-01-07 5 1
2000-01-08 1 4
2000-01-09 4 3
2000-01-10 0 0", 
                  header = TRUE, stringsAsFactors = FALSE)

df2 <- read.table(text = " Date V1 V2
2000-01-01 1 1
2000-01-02 0 0
2000-01-03 4 6
2000-01-04 6 5
2000-01-05 3 0
2000-01-06 3 0
2000-01-07 7 4
2000-01-08 9 0
2000-01-09 0 0
2000-01-10 0 0", 
                  header = TRUE, stringsAsFactors = FALSE)

df3 <- read.table(text = " Date V1 V2
2000-01-01 4 3
2000-01-02 4 0
2000-01-03 6 1
2000-01-04 7 5", 
                  header = TRUE, stringsAsFactors = FALSE)

df1$Date <- as.Date(df1$Date)
df2$Date <- as.Date(df2$Date)
df3$Date <- as.Date(df3$Date)

# Make a vector of all dates between the lowest and highest
tdate <- seq(from=as.Date(min(df1$Date, df2$Date, df3$Date)), to=as.Date(max(df1$Date, df2$Date, df3$Date)), by = 1)

# Merge the dataframes to this vector of dates
df4 <- data.frame(Date = tdate)
df4 <- merge(df4, df1, all.x=T, by="Date")
df4 <- merge(df4, df2, all.x=T, by="Date")
df4 <- merge(df4, df3, all.x=T, by="Date")
df4
#>          Date V1.x V2.x V1.y V2.y V1 V2
#> 1  2000-01-01   NA   NA    1    1  4  3
#> 2  2000-01-02   NA   NA    0    0  4  0
#> 3  2000-01-03   NA   NA    4    6  6  1
#> 4  2000-01-04   NA   NA    6    5  7  5
#> 5  2000-01-05   NA   NA    3    0 NA NA
#> 6  2000-01-06   NA   NA    3    0 NA NA
#> 7  2000-01-07    5    1    7    4 NA NA
#> 8  2000-01-08    1    4    9    0 NA NA
#> 9  2000-01-09    4    3    0    0 NA NA
#> 10 2000-01-10    0    0    0    0 NA NA
gfgm
  • 3,627
  • 14
  • 34
0

This can be achieved easily using dplyr.


df1 <- read.table(text = " Date V1 V2
2000-01-07 5 1
                  2000-01-08 1 4
                  2000-01-09 4 3
                  2000-01-10 0 0", 
                  header = TRUE, stringsAsFactors = FALSE)

df2 <- read.table(text = " Date V1 V2
                  2000-01-01 1 1
                  2000-01-02 0 0
                  2000-01-03 4 6
                  2000-01-04 6 5
                  2000-01-05 3 0
                  2000-01-06 3 0
                  2000-01-07 7 4
                  2000-01-08 9 0
                  2000-01-09 0 0
                  2000-01-10 0 0", 
                  header = TRUE, stringsAsFactors = FALSE)

df3 <- read.table(text = " Date V1 V2
                  2000-01-01 4 3
                  2000-01-02 4 0
                  2000-01-03 6 1
                  2000-01-04 7 5", 
                  header = TRUE, stringsAsFactors = FALSE)

df4 <- read.table(text = " Date df1_V1 df2_V1 df3_V1 df1_V2 df2_V2  df3_V2
                  2000-01-01 NA 1 4 NA 1 3
                  2000-01-02 NA 0 4 NA 0 0
                  2000-01-03 NA 4 6 NA 6 1
                  2000-01-04 NA 6 7 NA 5 5
                  2000-01-05 NA 3 NA NA 0 NA
                  2000-01-06 NA 3 NA NA 0 NA
                  2000-01-07 5 7 NA 1 4 NA
                  2000-01-08 1 9 NA 4 0 NA
                  2000-01-09 4 0 NA 3 0 NA
                  2000-01-10 0 0 NA 0 0 NA", 
                  header = TRUE, stringsAsFactors = FALSE)

library(dplyr)

# We can use `full_join` to merge the dataframes 
# (`full_join` means that all records from all of the dataframe 
# are represented in the new dataframe in fills NA values 
# where there is no matching date). by = c("Date" = "Date")
# tells dplyr that we want to join only on the Date column
df5 <- df1 %>% 
  full_join(df2, by = c("Date" = "Date")) %>% 
  full_join(df3, by = c("Date" = "Date"))

# next we use the arrange function to sort the dataframe
# by increasing Date
df5 <- df5 %>%
  arrange(Date) 

df5
#>          Date V1.x V2.x V1.y V2.y V1 V2
#> 1  2000-01-01   NA   NA    1    1  4  3
#> 2  2000-01-02   NA   NA    0    0  4  0
#> 3  2000-01-03   NA   NA    4    6  6  1
#> 4  2000-01-04   NA   NA    6    5  7  5
#> 5  2000-01-05   NA   NA    3    0 NA NA
#> 6  2000-01-06   NA   NA    3    0 NA NA
#> 7  2000-01-07    5    1    7    4 NA NA
#> 8  2000-01-08    1    4    9    0 NA NA
#> 9  2000-01-09    4    3    0    0 NA NA
#> 10 2000-01-10    0    0    0    0 NA NA

# Finally, we rename and reorder the columns to match your
# example answer, df4
df5 <- df5 %>%
  select(
    Date, 
    df1_V1 = V1.x,
    df2_V1 = V1.y,
    df3_V1 = V1,
    df1_V2 = V2.x,
    df2_V2 = V2.y,
    df3_V2 = V2)

df5
#>          Date df1_V1 df2_V1 df3_V1 df1_V2 df2_V2 df3_V2
#> 1  2000-01-01     NA      1      4     NA      1      3
#> 2  2000-01-02     NA      0      4     NA      0      0
#> 3  2000-01-03     NA      4      6     NA      6      1
#> 4  2000-01-04     NA      6      7     NA      5      5
#> 5  2000-01-05     NA      3     NA     NA      0     NA
#> 6  2000-01-06     NA      3     NA     NA      0     NA
#> 7  2000-01-07      5      7     NA      1      4     NA
#> 8  2000-01-08      1      9     NA      4      0     NA
#> 9  2000-01-09      4      0     NA      3      0     NA
#> 10 2000-01-10      0      0     NA      0      0     NA

all.equal(df5, df4)
#> [1] TRUE
Eumenedies
  • 1,618
  • 9
  • 13