0

I have a data frame and I need to detelete duplicated data. My main problem is that some data is duplicated but the text is not excactly the same.

ID NAME
234 ALEJANDRA AZYADtETH ONTIVEROS CABALLERO
235 ALEJANDRA CABALLERO
236 ALEJANDRA ONTIVEROS CABALLERO
237 10119715 - TIUSA
238 10119715 -TIUSA
239 10118580 - BAEZ
240 10118580 – BAEZ

I want to get a more homogeneous data frame. I already use duplicate() so the data that is exactly the same is gone.

Here are two samples of the data.

carrier_data1 <-
  structure(list(
    Id = c(
      2784L,
      2639L,
      6499L,
      6917L,
      6922L,
      1060L,
      6920L,
      13847L,
      5792L,
      5705L,
      4229L,
      7196L,
      6916L,
      5767L,
      5675L,
      5644L,
      9808L,
      5756L,
      7669L,
      3030L
    ),
    Name = c(
      " ALEJANDRA AZYADtETH ONTIVEROS CABALLERO",
      " ALEJANDRA CABALLERO",
      " ALEJANDRA ONTIVEROS CABALLERO",
      " ANTONIO MARQUEZ JUAREZ",
      " ARACELI NAVARRETE GONZALEZ",
      " ARELLANO VALDEZ LUIS EDUARDO",
      " AUTO EXPRESS DAMAJA",
      " AUTOTRANSPORTES PRIMAVERA",
      " BODEGUITA",
      " BUTRON GARCIA RAFAEL",
      " CALDERON HERNANDEZ JAIME",
      " CISNEROS QUEZADA JOSE HILARIO",
      " CYCA",
      " DANIEL QUIROZ PINEDA",
      " DRIVER",
      " DYLKA DISTRIB. Y LOGISTI K",
      " ERIK VELAZQUEZ MENA",
      " EVOLUTION",
      " FAVELA VELAZQUEZ MARIO",
      " FRANCISCO RICARDO MARTINEZ HERNANDEZ"
    )
  ),
  row.names = c(NA,
                20L),
  class = "data.frame")
carrier_data2 <- structure(list(
  Id = c(
    7709L,
    7714L,
    7717L,
    7708L,
    7723L,
    7707L,
    7753L,
    7754L,
    7743L,
    8721L,
    7727L,
    8667L,
    7724L,
    8496L,
    8443L,
    7702L,
    7751L,
    7742L,
    7711L,
    7744L,
    8735L,
    7755L,
    7349L,
    7757L,
    7759L,
    10249L,
    10250L,
    9932L,
    9341L,
    10432L,
    10292L,
    10424L,
    9518L,
    12120L,
    10337L,
    3568L,
    7148L,
    3196L,
    9403L,
    13150L,
    11925L
  ),
  Name = c(
    "10113183 - TRANSVETA",
    "10114582 - KARGOFER",
    "10118580 - BAEZ",
    "10118580 – BAEZ",
    "10119715 - TIUSA",
    "10119715 -TIUSA",
    "10123682 - HUGO ACUÑA SOSA",
    "10123781 - EDUARDO ACUÑA",
    "10125173 - TRANAN",
    "10130382 - GENOVEVA",
    "10133499 - TRABAL",
    "20-20",
    "24-7 TRANSPORTES INTERNACIONALES",
    "3 GENERACIONES KRONOS",
    "3 PL",
    "3 RS",
    "3 T CARRIER",
    "3CG GROUP",
    "3G-S",
    "3PL CARGO",
    "3R DE MEXICO",
    "3T",
    "4 ANTHAGO",
    "4 CAMINOS",
    "4 KARDO",
    "495TRANSPORTES TRAZIL",
    "4-A TRANSPORTES",
    "4GA",
    "501 RT",
    "501 TR",
    "5pl",
    "7 SEVEN",
    "7Jr6SRQW#",
    "99 MINUTOS",
    "A",
    "A & E EXPRESS",
    "A A TRUCKING",
    "A RAPIDOS LOZANO",
    "A TEMPORE",
    "A&R",
    "A1A TRUCKING"
  )
),
row.names = 166:206,
class = "data.frame")

0 Answers0