1

to create attributes, i can do so

dat$clas <- ifelse(grepl("den", dat$stuff), "bak", 
                  ifelse(grepl("kro", dat$stuff), "bak1", 

  ifelse(grepl("ris", dat$stuff), "bak3",  
         ifelse(grepl("muka", dat$stuff), "rty", 

               ifelse(grepl("chlo", dat$stuff), "cos", 
              ifelse(grepl("prokl", dat$stuff), "gig",  "no"))))))

but what i have many attributes, suppose there are 200 attributes. Using such ifelse statement is a long time to write and there will be a long code. Can i use it from dataframe.

templatedata<-prod  clas
den               bak
kro              bak1
ris               bak3
muka            rty
chlo               cos
prokl                gig

)

templatedata=structure(list(prod = structure(c(2L, 3L, 6L, 4L, 1L, 5L), .Label = c("chlo\t", 
"den", "kro\t", "muka", "prokl\t", "ris\t"), class = "factor"), 
    class = structure(c(1L, 2L, 3L, 6L, 4L, 5L), .Label = c("bak", 
    "bak1", "bak3", "cos", "gig", "rty"), class = "factor")), .Names = c("prod", 
"class"), class = "data.frame", row.names = c(NA, -6L))

so

workingdataset<-(
prod
den sg
kro serdgt
ris szdg
muka aszgt
chlo sdgt
prokl zfdsgr
den zdasfh)


workingdataset=structure(list(prod = structure(c(2L, 4L, 7L, 5L, 1L, 6L, 3L), .Label = c("chlo sdgt", 
"den sg", "den zdasfh)", "kro serdgt", "muka aszgt", "prokl zfdsgr", 
"ris szdg"), class = "factor")), .Names = "prod", class = "data.frame", row.names = c(NA, 
-7L))

i use templatedata to get attribute in workigdataset.

as output workingdataset looks like

prod           clas
den   sg        bak
kro  serdgt      bak1
ris szdg        bak3
muka aszgt      rty
chlo sdgt       cos
prokl   zfdsgr  gig
den  vv         bak

how to do it

psysky
  • 3,037
  • 5
  • 28
  • 64
  • https://stackoverflow.com/questions/1299871/how-to-join-merge-data-frames-inner-outer-left-right it is not for my task (i work with phrases) – psysky Oct 15 '18 at 19:32
  • @MrFlick, i edited post, all can i do , it is do dput . Now workingdataset contain phrases with key words. i must create attributes from templatedata by this keyword – psysky Oct 15 '18 at 19:36
  • @MrFlick, i provided dput – psysky Oct 15 '18 at 19:40

1 Answers1

3

You can achieve this using the wonderful fuzzyjoin package created by David Robinson. It allows you to join two tables using fuzzy logic, including string distances or regular expressions. Here, we'll use regular expressions.

library(fuzzyjoin)
library(magrittr)

workingdataset=structure(list(prod = structure(c(2L, 4L, 7L, 5L, 1L, 6L, 3L), 
                                               .Label = c("chlo sdgt", 
                                                          "den sg", 
                                                          "den zdasfh)", 
                                                          "kro serdgt", 
                                                          "muka aszgt", 
                                                          "prokl zfdsgr", 
                                                          "ris szdg"), 
                                               class = "factor")), 
                         .Names = "prod", 
                         class = "data.frame", 
                         row.names = c(NA, -7L))

templatedata=structure(list(prod = structure(c(2L, 3L, 6L, 4L, 1L, 5L), 
                                             .Label = c("chlo", 
                                                        "den", 
                                                        "kro", 
                                                        "muka", 
                                                        "prokl", 
                                                        "ris"), 
                                             class = "factor"), 
                            class = structure(c(1L, 2L, 3L, 6L, 4L, 5L), 
                                              .Label = c("bak", "bak1", "bak3", 
                                                         "cos", "gig", "rty"), 
                                              class = "factor")), 
                       .Names = c("prod", "class"), 
                       class = "data.frame", 
                       row.names = c(NA, -6L))

workingdataset %>%
    regex_inner_join(templatedata, by='prod')

        prod.x prod.y class
1       den sg    den   bak
2   kro serdgt    kro  bak1
3     ris szdg    ris  bak3
4   muka aszgt   muka   rty
5    chlo sdgt   chlo   cos
6 prokl zfdsgr  prokl   gig
7  den zdasfh)    den   bak
tblznbits
  • 6,602
  • 6
  • 36
  • 66