1

I was able to change the encoding of a dataframe using the code provided in R- Changing encoding of column in dataframe?, but I would like to use map instead of a for loop, but I wasn't able to do so and can't figure out why. I intend to use it a piped workflow which using for loops will be very cumbersome.

# Example of the data:
dat <- structure(list(CNES = c("0137162", "0137170", "0137189", "0137197", 
                               "0137200", "0137219"), CPF_CNPJ = c("87768735000148", "03005201000170", 
                                                                   "00000000000000", "00000000000000", "00000000000000", "87775334000115"
                               ), FANTASIA = c("HOSPITAL DE CARIDADE E BENEFIC\xcaNCIA", "COELHO & JUNG INSTITUTO DE OFTALMOLOGIA LTDA", 
                                               "UNIDADE SANIT\xc1RIA 03 - PR\xc9DIO HOSPITAL DA LIGA", "UNIDADE SANIT\xc1RIA 11 - BOSQUE", 
                                               "PROGRAMA DE SA\xdaDE DA FAM\xcdLIA 01 - BAIRRO PROMORAR", "SIND TRAB IND ALIMENTA\xc7\xc3O"
                               ), RAZ_SOCI = c("CNPJ 87.768.735/0001-48-HOSPITAL DE CARIDADE E BENEFIC\xcaNCIA", 
                                               "CNPJ 03.005.201/0001-70-COELHO & JUNG INSTITUTO DE OFTALMOLOGIA LTDA", 
                                               "CNPJ 00.000.000/0000-00-PREFEITURA MUNICIPAL DE CACHOEIRA DO SUL", 
                                               "CNPJ 00.000.000/0000-00-PREFEITURA MUNICIPAL DE CACHOEIRA DO SUL", 
                                               "CNPJ 00.000.000/0000-00-PREFEITURA MUNICIPAL DE CACHOEIRA DO SUL", 
                                               "CNPJ 87.775.334/0001-15-SINDICATO DOS TRABALHADORES NA INDUSTRIA DA ALIMENTA\xc7\xc3O"
                               )), row.names = c("1", "2", "3", "4", "5", "6"), class = "data.frame")

# Using map
dat %>% 
  map(~ Encoding(.x) <-  "latin1")

Jason Aller
  • 3,541
  • 28
  • 38
  • 38
Marcio Rodrigues
  • 319
  • 1
  • 11

2 Answers2

3

We need to return the data as well

library(dplyr)
library(purrr)
dat1 <- dat %>% 
   map_dfc(~ {
              Encoding(.x) <-  "latin1"
       .x})

This can be also done with a single line by using the assignment function Encoding<-

dat1 <- dat %>%
          map_dfc(~ `Encoding<-`(.x, "latin1"))

Or without a lambda call

dat %>%
      map_dfc(`Encoding<-`, "latin1")

-checking the structure of the original data and the updated

str(dat)
#'data.frame':  6 obs. of  4 variables:
# $ CNES    : chr  "0137162" "0137170" "0137189" "0137197" ...
# $ CPF_CNPJ: chr  "87768735000148" "03005201000170" "00000000000000" "00000000000000" ...
# $ FANTASIA: chr  "HOSPITAL DE CARIDADE E BENEFIC\xcaNCIA" "COELHO & JUNG INSTITUTO DE OFTALMOLOGIA LTDA" "UNIDADE SANIT\xc1RIA 03 - PR\xc9DIO HOSPITAL DA LIGA" "UNIDADE SANIT\xc1RIA 11 - BOSQUE" ...
# $ RAZ_SOCI: chr  "CNPJ 87.768.735/0001-48-HOSPITAL DE CARIDADE E BENEFIC\xcaNCIA" "CNPJ 03.005.201/0001-70-COELHO & JUNG INSTITUTO DE OFTALMOLOGIA LTDA" "CNPJ 00.000.000/0000-00-PREFEITURA MUNICIPAL DE CACHOEIRA DO SUL" "CNPJ 00.000.000/0000-00-PREFEITURA MUNICIPAL DE CACHOEIRA DO SUL" ...


str(dat1)
#tibble [6 × 4] (S3: tbl_df/tbl/data.frame)
# $ CNES    : chr [1:6] "0137162" "0137170" "0137189" "0137197" ...
# $ CPF_CNPJ: chr [1:6] "87768735000148" "03005201000170" "00000000000000" "00000000000000" ...
# $ FANTASIA: chr [1:6] "HOSPITAL DE CARIDADE E BENEFICÊNCIA" "COELHO & JUNG INSTITUTO DE OFTALMOLOGIA LTDA" "UNIDADE SANITÁRIA 03 - PRÉDIO HOSPITAL DA LIGA" "UNIDADE SANITÁRIA 11 - BOSQUE" ...
# $ RAZ_SOCI: chr [1:6] "CNPJ 87.768.735/0001-48-HOSPITAL DE CARIDADE E BENEFICÊNCIA" "CNPJ 03.005.201/0001-70-COELHO & JUNG INSTITUTO DE OFTALMOLOGIA LTDA" "CNPJ 00.000.000/0000-00-PREFEITURA MUNICIPAL DE CACHOEIRA DO SUL" "CNPJ 00.000.000/0000-00-PREFEITURA MUNICIPAL DE CACHOEIRA DO SUL" ...

Or check by column

sapply(dat, Encoding)
#     CNES      CPF_CNPJ  FANTASIA  RAZ_SOCI 
#[1,] "unknown" "unknown" "unknown" "unknown"
#[2,] "unknown" "unknown" "unknown" "unknown"
#[3,] "unknown" "unknown" "unknown" "unknown"
#[4,] "unknown" "unknown" "unknown" "unknown"
#[5,] "unknown" "unknown" "unknown" "unknown"
#[6,] "unknown" "unknown" "unknown" "unknown"


sapply(dat1, Encoding)
#     CNES      CPF_CNPJ  FANTASIA  RAZ_SOCI 
#[1,] "unknown" "unknown" "latin1"  "latin1" 
#[2,] "unknown" "unknown" "unknown" "unknown"
#[3,] "unknown" "unknown" "latin1"  "unknown"
#[4,] "unknown" "unknown" "latin1"  "unknown"
#[5,] "unknown" "unknown" "latin1"  "unknown"
#[6,] "unknown" "unknown" "latin1"  "latin1" 

Or we can use across as well

dat1 <- dat %>%
       mutate(across(everything(), ~ `Encoding<-`(.x, "latin1"))) 
akrun
  • 874,273
  • 37
  • 540
  • 662
3

Here is a data.table option which might work and help

setDT(dat)[, lapply(.SD, `Encoding<-`, "latin1")]
ThomasIsCoding
  • 96,636
  • 9
  • 24
  • 81