-1

This is my first data.

# A tibble: 10 x 6
   start_location_Long start_location_Lat end_location_Long end_location_Lat length clusterNum
                 <dbl>              <dbl>             <dbl>            <dbl>  <dbl>      <dbl>
 1                101.               13.8              101.             13.8  1363.          1
 2                101.               13.8              101.             13.8  1224.          1
 3                101.               13.8              101.             13.8  1045.          1
 4                101.               13.8              101.             13.8   106.          1
 5                101.               13.8              101.             13.8  5037.          1
 6                101.               13.8              101.             13.8  3682.          2
 7                101.               13.8              101.             13.8   866.          2
 8                101.               13.8              101.             13.8  1764.          6
 9                101.               13.8              101.             13.8  1950.          6
10                101.               13.8              101.             13.8   931.          6
df1 <- structure(list(start_location_Long = c(100.56348, 100.54894,100.56529, 100.54631, 100.57274, 100.54883, 100.53564, 100.55054,100.55533, 100.55743), start_location_Lat = c(13.79793, 13.81345,13.75772, 13.81105, 13.77156, 13.8129, 13.80799, 13.79688, 13.78796,13.76681), end_location_Long = c(100.56723, 100.55415, 100.56994,100.54612, 100.56129, 100.53425, 100.53375, 100.54621, 100.57325,100.5611), end_location_Lat = c(13.80969, 13.80363, 13.766, 13.81199,13.81569, 13.78282, 13.80038, 13.81225, 13.78991, 13.77442),length = c(1362.81284624, 1223.81637204, 1045.02327624, 106.008861913,5036.9607016, 3682.49608165, 866.396979262, 1763.77103489,1949.69621816, 930.799744269), clusterNum = c(1, 1, 1, 1,1, 2, 2, 6, 6, 6)), row.names = c(NA, -10L), groups = structure(list(clusterNum = c(1, 2, 6), .rows = structure(list(1:5, 6:7,8:10), ptype = integer(0), class = c("vctrs_list_of","vctrs_vctr", "list"))), row.names = c(NA, 3L), class = c("tbl_df","tbl", "data.frame"), .drop = TRUE), class = c("grouped_df","tbl_df", "tbl", "data.frame"))

And this is the second data that I would join.

# A tibble: 10 x 12
   start_location_~ start_location_~ seg_2_lat seg_2_lon seg_3_lat seg_3_lon seg_4_lat seg_4_lon seg_5_lat seg_5_lon end_location_Lat
              <dbl>            <dbl>     <dbl>     <dbl>     <dbl>     <dbl>     <dbl>     <dbl>     <dbl>     <dbl>            <dbl>
 1             13.8             101.      13.7      101.      13.6      101.      13.7      101.      13.7      101.             13.8
 2             13.7             101.      13.7      101.      13.7      101.      13.7      101.      13.7      101.             13.7
 3             13.6             101.      13.6      101.      13.6      101.      13.6      101.      13.6      101.             13.6
 4             13.6             101.      13.6      101.      13.6      101.      13.6      101.      13.6      101.             13.6
 5             13.6             101.      13.6      101.      13.6      101.      13.6      101.      13.6      101.             13.6
 6             13.6             101.      13.6      101.      13.6      101.      13.6      101.      13.6      101.             13.6
 7             13.6             101.      13.6      101.      13.6      101.      13.6      101.      13.6      101.             13.6
 8             13.9             101.      13.9      101.      13.9      101.      13.9      101.      13.9      101.             13.9
 9             13.9             101.      13.9      101.      13.9      101.      13.9      101.      13.9      101.             13.9
10             13.9             101.      13.9      101.      13.9      101.      13.9      101.      13.9      101.             13.9
# ... with 1 more variable: end_location_Long <dbl>
df2 <- structure(list(start_location_Lat = c(13.79793, 13.66859, 13.63345,13.60629, 13.56958, 13.60148, 13.60283, 13.93821, 13.92455, 13.88824), start_location_Long = c(100.56348, 100.72032, 100.71102, 100.69999,100.78749, 100.71103, 100.70543, 100.51485, 100.59617, 100.60413), seg_2_lat = c(13.65382, 13.66353, 13.63349, 13.61857, 13.57759,13.60519, 13.60118, 13.92443, 13.94145, 13.89331), seg_2_lon = c(100.67562,100.71894, 100.71096, 100.70363, 100.78828, 100.7129, 100.71196,100.52361, 100.61323, 100.60713), seg_3_lat = c(13.63679, 13.66353,13.63349, 13.6344, 13.58393, 13.61076, 13.60193, 13.92633, 13.93945,13.88969), seg_3_lon = c(100.71057, 100.71894, 100.71096, 100.71093,100.79077, 100.71317, 100.71915, 100.52603, 100.64207, 100.60779), seg_4_lat = c(13.65828, 13.66353, 13.63349, 13.6344, 13.58385,13.62504, 13.60487, 13.93948, 13.93277, 13.88969), seg_4_lon = c(100.71631,100.71894, 100.71096, 100.71093, 100.79403, 100.71832, 100.71483,100.54145, 100.68214, 100.60779), seg_5_lat = c(13.65828, 13.65958,13.63349, 13.6344, 13.59328, 13.63082, 13.605, 13.93382, 13.93277,13.88728), seg_5_lon = c(100.71631, 100.71773, 100.71096, 100.71093,100.76465, 100.71459, 100.71402, 100.56506, 100.68214, 100.61401), end_location_Lat = c(13.80969, 13.65258, 13.63349, 13.63433,13.59612, 13.63241, 13.60567, 13.92856, 13.92057, 13.88462),end_location_Long = c(100.56723, 100.71571, 100.71096, 100.71101,100.74922, 100.71297, 100.71292, 100.58192, 100.68588, 100.62013)), row.names = c(NA, -10L), class = c("tbl_df", "tbl", "data.frame")) 

In df1 and df2 have the same values in 4 columns which are start_location_Long start_location_Lat end_location_Long and end_location_Lat. So, I want to merge the df1 and df2 together and my output will be df1 come up with extra columns from df2 (Left outer join?).

my expected result would have columns like;

start_location_Long start_location_Lat end_location_Long end_location_Lat length clusterNum seg_2_lat seg_2_lon seg_3_lat seg_3_lon seg_4_lat seg_4_lon seg_5_lat seg_5_lon

Thank you in advance for any helps.

Yasumin
  • 443
  • 2
  • 8
  • You may want to check : https://stackoverflow.com/questions/1299871/how-to-join-merge-data-frames-inner-outer-left-right – Ronak Shah Jun 29 '21 at 11:03

1 Answers1

2

You can do:

library(dplyr)
df_out <- left_join(df1, df2, by = c("start_location_Long","start_location_Lat", "end_location_Long", "end_location_Lat"))
MonJeanJean
  • 2,876
  • 1
  • 4
  • 20