0

I have a dataframe, which has the following information:

installed_players    active_software
vlc.exe;wmplayer.exe    smss.exe;csrss.exe;winlogon.exe;vlc.exe
wmplayer.exe    smss.exe;csrss.exe;winlogon.exe;wmplayer.exe
wmplayer.exe    smss.exe;csrss.exe;winlogon.exe

What I need is to find which installed players are active now (which software from column A is in column B) and write it to column C

So the output should be the following

installed_players    active_software    active_players
vlc.exe;wmplayer.exe    smss.exe;csrss.exe;winlogon.exe;vlc.exe    vlc.exe
wmplayer.exe    smss.exe;csrss.exe;winlogon.exe;wmplayer.exe    wmplayer.exe
wmplayer.exe    smss.exe;csrss.exe;winlogon.exe    none

I guess I have something to do with intersect function, but I can't figure out how to do that in a dataframe, where coumn A, and B are strings.


Data:

df1 <- structure(list(players_installed = c("wmplayer.exe", "fsquirt.exe XMP.exe" ),
                      processes_run = c("smss.exe;csrss.exe;wininit.exe;reg.exe",
                                        "smss.exe;csrss.exe;wininit.exe;services.exe;lsass.exe;svchost.exe;fontdrvhost.exe;WUDFHost.exe;igfxCUIService.exe;ibmpmsvc.exe;Lenovo.Modern.ImController.exe;ibtsiva.exe;lvvsst.exe;micmute.exe;CamMute.exe;AppleMobileDeviceService.exe;armsvc.exe;")),
                      .Names = c("players_installed", "processes_run"),
                      row.names = 3:4, class = "data.frame")
M--
  • 25,431
  • 8
  • 61
  • 93

2 Answers2

0

Here is the solution

df1 <- structure(list(players_installed = c("wmplayer.exe", "fsquirt.exe XMP.exe" ),
                  processes_run = c("smss.exe;csrss.exe;wininit.exe;reg.exe;wmplayer.exe",
                                    "smss.exe;csrss.exe;wininit.exe;services.exe;lsass.exe;svchost.exe;fontdrvhost.exe;WUDFHost.exe;igfxCUIService.exe;ibmpmsvc.exe;Lenovo.Modern.ImController.exe;ibtsiva.exe;lvvsst.exe;micmute.exe;CamMute.exe;AppleMobileDeviceService.exe;armsvc.exe;")),
             .Names = c("players_installed", "processes_run"),
             row.names = 3:4, class = "data.frame")
df1$processes_run <- gsub(";", " ", df1$processes_run)
x <- lapply(df1, strsplit, " ")
x <- Map(intersect, x[[1]], x[[2]])
df1$common <- sapply(x, paste0, collapse = " ")

Second time I post question and answer it myself. Thanks for help, zx8754.

0

Another solution using and packages:

library(stringr)
library(qdapTools)

X <- sapply(1:nrow(df), function(i)
              mtabulate(stri_extract_all_words(list(df[i,1], df[i,2]))))

Y <- lapply(1:length(X), function(i) colSums(X[[i]]) == nrow(X[[i]]))

Z <- lapply(lapply(1:length(Y), function(i) Y[[i]][Y[[i]]]), function(x) names(x))

Z[lengths(Z) == 0] <- NA_character_

df$common <- unlist(Z) 

df
##      installed_players                              active_software       common
## 1 vlc.exe;wmplayer.exe      smss.exe;csrss.exe;winlogon.exe;vlc.exe      vlc.exe
## 2         wmplayer.exe smss.exe;csrss.exe;winlogon.exe;wmplayer.exe wmplayer.exe
## 3         wmplayer.exe              smss.exe;csrss.exe;winlogon.exe         <NA>

Data:

 df <- structure(list(installed_players = c("vlc.exe;wmplayer.exe",                                  
     "wmplayer.exe", "wmplayer.exe"), active_software = c("smss.exe;csrss.exe;winlogon.exe;vlc.exe", 
     "smss.exe;csrss.exe;winlogon.exe;wmplayer.exe", "smss.exe;csrss.exe;winlogon.exe"               
     )), .Names = c("installed_players", "active_software"), row.names = c(NA,                       
     3L), class = "data.frame")  
M--
  • 25,431
  • 8
  • 61
  • 93