I suggest this solution: it does not use any other library than base
.
- Define a
permutations
function, to compute all possibile combinations of elements of a vector (or list) vec
taken sublen
elements at a time (likewise the combn
function)
permutations <- function(vec, sublen, prev_vec=NULL){
out_list <- list()
if(sublen==1){
for(v in vec){
out_list <- append(out_list,list(append(prev_vec,list(v))))
}
} else {
for (i in 1:(length(vec)-sublen+1)){
v <- vec[1]
prev_vec0 <- c(prev_vec,vec[1])
vec <- vec[2:length(vec)]
perm_list <- permutations(
vec=vec,
sublen=sublen-1,
prev_vec=prev_vec0
)
out_list <- append(out_list,perm_list)
}
}
return(out_list)
}
- Define a
find_matrix
function that unlists matrices from deeply nested lists (source)
find_matrix <- function(x) {
if (is.matrix(x))
return(list(x))
if (!is.list(x))
return(NULL)
unlist(lapply(x, find_matrix), FALSE)
}
- Define a
compatible_rows
function, that extract from a dataframe a subset of rows which can be used to create the other output vectors, given an output vector
compatible_rows <- function(df,row_value){
row_ids <- c()
if(is.null(nrow(df))){
return(NULL)
} else {
for (row_id in 1:nrow(df)){
row_ids <- c(row_ids,!any(row_value %in% df[row_id,]))
}
return(df[which(row_ids),])
}
}
- Create a
new_groups_list
function, that computes all possible output matrices
new_groups_list <- function(df, prev_df=NULL, lvl=-1, verbose=F){
lvl <- lvl+1
results_list <- list()
if(is.null(nrow(df))){
if(verbose==T) cat(paste0("--BRANCH END (BEGIN lvl ",lvl,")--\n"))
prev_df0 <- rbind(prev_df,df)
rownames(prev_df0) <- NULL
if(verbose==T) cat("returned\n")
if(verbose==T) print(prev_df0)
if(verbose==T) cat("\n")
return(prev_df0)
}
if(nrow(df)==0){
if(verbose==T) cat(paste0("--BRANCH END (BEGIN lvl ",lvl,")--\n"))
prev_df0 <- rbind(prev_df,df)
rownames(prev_df0) <- NULL
if(verbose==T) cat("returned\n")
if(verbose==T) print(prev_df0)
if(verbose==T) cat("\n")
return(prev_df0)
}
for(row_id in 1:nrow(df)){
if(verbose==T) cat(paste("-- lvl",lvl,"cycle",row_id,"--\n"))
if(verbose==T) cat("initial results list\n")
if(verbose==T) print(results_list)
if(verbose==T) cat("\n")
if(verbose==T) cat("df in\n")
if(verbose==T) assign("last_df",df,envir = .GlobalEnv)
if(verbose==T) print(df)
if(verbose==T) cat("\n")
if(is.null(nrow(df))){
prev_df0 <- rbind(prev_df,df)
rownames(prev_df0) <- NULL
if(verbose==T) cat(paste0("--BRANCH END (MID lvl ",lvl,")--\n"))
if(verbose==T) cat("returned\n")
results_list <- append(results_list,list(prev_df0))
if(verbose==T) print(results_list)
if(verbose==T) cat("\n")
return(results_list)
}
considered_row <- df[1,]
if(verbose==T) assign("last_considered_row",considered_row,envir = .GlobalEnv)
if(verbose==T) cat("considered rows\n")
if(verbose==T) print(considered_row)
if(verbose==T) cat("\n")
df <- df[2:nrow(df),]
if(verbose==T) assign("last_df",df,envir = .GlobalEnv)
if(verbose==T) cat("df without considered rows\n")
if(verbose==T) print(df)
if(verbose==T) cat("\n")
prev_df0 <- rbind(prev_df,considered_row)
rownames(prev_df0) <- NULL
if(verbose==T) assign("last_prev0",prev_df0,envir = .GlobalEnv)
if(verbose==T) cat("collected considered rows\n")
if(verbose==T) print(prev_df0)
if(verbose==T) cat("\n")
comp_df <- compatible_rows(df,considered_row)
if(verbose==T) assign("last_comp_df",comp_df,envir = .GlobalEnv)
if(verbose==T) cat("compatible rows in df\n")
if(verbose==T) print(comp_df)
if(verbose==T) cat("\n")
if(verbose==T) cat(paste(">>> GOING TO LVL",lvl+1,"\n\n"))
new_rows <- new_groups_list(
comp_df,
prev_df=prev_df0,
lvl=lvl,
verbose=verbose
)
if(verbose==T) cat(paste0("--ROOT (lvl ",lvl,")--\n"))
if(verbose==T) cat("result received from branch\n")
if(verbose==T) print(new_rows)
if(verbose==T) cat("\n")
results_list <- append(results_list,list(new_rows))
if(verbose==T) cat("results list\n")
if(verbose==T) print(results_list)
if(verbose==T) cat("\n")
}
return(results_list)
}
- Create
create_new_groups
, which wraps-up all the other functions, and outputs the whole list of possible solutions
create_new_groups <- function(original_groups, max_output = NULL){
min_len_original_groups = min(lengths(original_groups))
num_original_groups = length(original_groups)
max_len_subgroup <- floor(min_len_original_groups/2)
if(min_len_original_groups<2){
return("Not possible to populate new groups: at least one original group has less than 2 elements")
}
NewGroups_subLen_len_num <- list()
for (len_subgroup in 1:max_len_subgroup){
new_group_params <- c(
len_subgroup,
len_subgroup*num_original_groups,
floor(min_len_original_groups/len_subgroup)
)
NewGroups_subLen_len_num[[len_subgroup]] <- new_group_params
}
out_list <- list()
ind <- 1
for (e in 1:length(NewGroups_subLen_len_num)){
NewGroup_subLen_len_num <- NewGroups_subLen_len_num[[e]]
elem_list <- list()
ind <- 1
# print(ind)
for (o in 1:length(original_groups)){
original_group <- original_groups[[o]]
elem_list[[paste("group",ind)]] <- permutations(original_group,NewGroup_subLen_len_num[1])
ind <- ind+1
}
out_list[[paste(c("subLen","len","numGroups"), NewGroup_subLen_len_num, collapse = " ")]] <- elem_list
}
results_list <- list()
config_test <<- NewGroups_subLen_len_num
for (config_id in 1:length(NewGroups_subLen_len_num)){
config <- NewGroups_subLen_len_num[[config_id]]
perm_grid <- expand.grid(out_list[[config_id]])
perm_grid <- split(perm_grid,1:nrow(perm_grid))
perm_grid <- lapply(perm_grid,unlist)
perm_grid <- lapply(perm_grid,as.character)
perm_grid <- do.call(rbind, perm_grid)
new_groups <- new_groups_list(perm_grid,verbose = F)
new_groups <- find_matrix(new_groups)
# config_test <<- config
new_groups <- new_groups[which(sapply(new_groups, nrow) >= config[3])]
# out_test <<- new_groups
results_list[[config_id]] <- new_groups
}
return(results_list)
}
Given a simple input, like
original_groups <- list(
group_1 = as.character(1:2),
group_2 = as.character(3:4),
group_3 = as.character(5:7)
)
The output of create_new_groups(original_groups)
is
> create_new_groups_modified(original_groups)
[[1]]
[[1]][[1]]
[,1] [,2] [,3]
[1,] "1" "3" "5"
[2,] "2" "4" "6"
[[1]][[2]]
[,1] [,2] [,3]
[1,] "1" "3" "5"
[2,] "2" "4" "7"
[[1]][[3]]
[,1] [,2] [,3]
[1,] "2" "3" "5"
[2,] "1" "4" "6"
[[1]][[4]]
[,1] [,2] [,3]
[1,] "2" "3" "5"
[2,] "1" "4" "7"
[[1]][[5]]
[,1] [,2] [,3]
[1,] "1" "4" "5"
[2,] "2" "3" "6"
[[1]][[6]]
[,1] [,2] [,3]
[1,] "1" "4" "5"
[2,] "2" "3" "7"
[[1]][[7]]
[,1] [,2] [,3]
[1,] "2" "4" "5"
[2,] "1" "3" "6"
[[1]][[8]]
[,1] [,2] [,3]
[1,] "2" "4" "5"
[2,] "1" "3" "7"
[[1]][[9]]
[,1] [,2] [,3]
[1,] "1" "3" "6"
[2,] "2" "4" "7"
[[1]][[10]]
[,1] [,2] [,3]
[1,] "2" "3" "6"
[2,] "1" "4" "7"
[[1]][[11]]
[,1] [,2] [,3]
[1,] "1" "4" "6"
[2,] "2" "3" "7"
[[1]][[12]]
[,1] [,2] [,3]
[1,] "2" "4" "6"
[2,] "1" "3" "7"
Moreover, the create_new_groups
function also creates a global variable config_test
where all possible configurations, for a given list of groups (i.e., original_groups
), are stored. For example, for the previous problem, config_test
is equal to
> config_test
[[1]]
[1] 1 3 2
So, for this problem only one output configuration is possible, having this structure:
- the number of elements taken from each original group and used in each output group is 1
- the length of the output groups is 6
- the number of output groups (in each possible combination) is 2
Given a slightly more complex example
original_groups <- list(
group_1 = as.character(1:4),
group_2 = as.character(5:8),
group_3 = as.character(9:13)
)
config_test
would be equal to
> config_test
[[1]]
[1] 1 3 4
[[2]]
[1] 2 6 2
I made some tests, this method should work for any number of groups, of any length, and the output should always be composed of not-duplicated matrices.
I'm sorry if the explanation is short, if I have time in the following days I'll try to add some notes.
EDIT
A simple way to output only the configurations characterized by a specific number of elements from the original groups is to change the create_new_groups
as follows
create_new_groups_modified <- function(original_groups, max_output = NULL, elements_from_original = NULL){
min_len_original_groups = min(lengths(original_groups))
num_original_groups = length(original_groups)
max_len_subgroup <- floor(min_len_original_groups/2)
if(min_len_original_groups<2){
stop("Not possible to populate new groups: at least one original group has less than 2 elements")
}
NewGroups_subLen_len_num <- list()
for (len_subgroup in 1:max_len_subgroup){
new_group_params <- c(
len_subgroup,
len_subgroup*num_original_groups,
floor(min_len_original_groups/len_subgroup)
)
NewGroups_subLen_len_num[[len_subgroup]] <- new_group_params
}
out_list <- list()
ind <- 1
for (e in 1:length(NewGroups_subLen_len_num)){
NewGroup_subLen_len_num <- NewGroups_subLen_len_num[[e]]
elem_list <- list()
ind <- 1
# print(ind)
for (o in 1:length(original_groups)){
original_group <- original_groups[[o]]
elem_list[[paste("group",ind)]] <- permutations(original_group,NewGroup_subLen_len_num[1])
ind <- ind+1
}
out_list[[paste(c("subLen","len","numGroups"), NewGroup_subLen_len_num, collapse = " ")]] <- elem_list
}
results_list <- list()
config_test <<- NewGroups_subLen_len_num
# if `elements_from_original` is not set, output all possible combinations
if(is.null(elements_from_original)){
for (config_id in 1:length(NewGroups_subLen_len_num)){
config <- NewGroups_subLen_len_num[[config_id]]
perm_grid <- expand.grid(out_list[[config_id]])
perm_grid <- split(perm_grid,1:nrow(perm_grid))
perm_grid <- lapply(perm_grid,unlist)
perm_grid <- lapply(perm_grid,as.character)
perm_grid <- do.call(rbind, perm_grid)
new_groups <- new_groups_list(perm_grid,verbose = F)
new_groups <- find_matrix(new_groups)
# config_test <<- config
new_groups <- new_groups[which(sapply(new_groups, nrow) >= config[3])]
# out_test <<- new_groups
results_list[[config_id]] <- new_groups
}
} else {
# if `elements_from_original` is set, check if this is a valid configuration, then output only the matrix having this configuration
config_id <- which(sapply(NewGroups_subLen_len_num,function(x) x[1]==elements_from_original))
if (length(config_id)!=0){
config <- NewGroups_subLen_len_num[[config_id]]
perm_grid <- expand.grid(out_list[[config_id]])
perm_grid <- split(perm_grid,1:nrow(perm_grid))
perm_grid <- lapply(perm_grid,unlist)
perm_grid <- lapply(perm_grid,as.character)
perm_grid <- do.call(rbind, perm_grid)
new_groups <- new_groups_list(perm_grid,verbose = F)
new_groups <- find_matrix(new_groups)
new_groups <- new_groups[which(sapply(new_groups, nrow) >= config[3])]
results_list[[1]] <- new_groups
} else {
stop("value of elements_from_original not available: check config_test to see available configurations")
}
}
return(results_list)
}
The elements_from_original
parameter of the function allows to set the number elements from the original groups to consider, and, if applicable, the output will include only matrices following that configuration.
EDIT 2
To output the matrices composed by a specific number of groups
- Write a new function
select_matrices_by_number_output_groups
, that outputs only the matrices with n_output_groups
rows
select_matrices_by_number_output_groups <- function(l,n_output_groups){
# Filter out matrices having less rows than `n_output_groups`
out_l <- l[which(
sapply(
l,
# function(x) check_matrix_by_number_output_groups(x,n_output_groups)
function(mtr){
if(nrow(mtr)<n_output_groups) return(F)
else return(T)
}
)
)]
# Cut-off rows from matrices having more rows than `n_output_groups`
out_l <- lapply(
out_l,
function(x) head(x,n_output_groups)
)
# Keep only unique elements (i.e., matrices)
out_l <- unique(out_l)
return(out_l)
}
- The update
create_new_groups
so that it includes the select_matrices_by_number_output_groups
function
create_new_groups_modified_2 <- function(original_groups, max_output = NULL, elements_from_original = NULL, n_output_groups = NULL){
min_len_original_groups = min(lengths(original_groups))
num_original_groups = length(original_groups)
max_len_subgroup <- floor(min_len_original_groups/2)
if(min_len_original_groups<2){
stop("Not possible to populate new groups: at least one original group has less than 2 elements")
}
NewGroups_subLen_len_num <- list()
for (len_subgroup in 1:max_len_subgroup){
new_group_params <- c(
len_subgroup,
len_subgroup*num_original_groups,
floor(min_len_original_groups/len_subgroup)
)
NewGroups_subLen_len_num[[len_subgroup]] <- new_group_params
}
out_list <- list()
ind <- 1
for (e in 1:length(NewGroups_subLen_len_num)){
NewGroup_subLen_len_num <- NewGroups_subLen_len_num[[e]]
elem_list <- list()
ind <- 1
# print(ind)
for (o in 1:length(original_groups)){
original_group <- original_groups[[o]]
elem_list[[paste("group",ind)]] <- permutations(original_group,NewGroup_subLen_len_num[1])
ind <- ind+1
}
out_list[[paste(c("subLen","len","numGroups"), NewGroup_subLen_len_num, collapse = " ")]] <- elem_list
}
results_list <- list()
config_test <<- NewGroups_subLen_len_num
# if `elements_from_original` is not set, output all possible combinations
if(is.null(elements_from_original)){
for (config_id in 1:length(NewGroups_subLen_len_num)){
config <- NewGroups_subLen_len_num[[config_id]]
perm_grid <- expand.grid(out_list[[config_id]])
perm_grid <- split(perm_grid,1:nrow(perm_grid))
perm_grid <- lapply(perm_grid,unlist)
perm_grid <- lapply(perm_grid,as.character)
perm_grid <- do.call(rbind, perm_grid)
new_groups <- new_groups_list(perm_grid,verbose = F)
new_groups <- find_matrix(new_groups)
new_groups <- new_groups[which(sapply(new_groups, nrow) >= config[3])]
results_list[[config_id]] <- new_groups
}
} else {
# if `elements_from_original` is set, check if this is a valid configuration, then output only the matrix having this configuration
config_id <- which(sapply(NewGroups_subLen_len_num,function(x) x[1]==elements_from_original))
if (length(config_id)!=0){
config <- NewGroups_subLen_len_num[[config_id]]
perm_grid <- expand.grid(out_list[[config_id]])
perm_grid <- split(perm_grid,1:nrow(perm_grid))
perm_grid <- lapply(perm_grid,unlist)
perm_grid <- lapply(perm_grid,as.character)
perm_grid <- do.call(rbind, perm_grid)
new_groups <- new_groups_list(perm_grid,verbose = F)
new_groups <- find_matrix(new_groups)
new_groups <- lapply(
new_groups,
function(x) {
dimnames(x) <- NULL
return(x)
}
)
if(is.null(n_output_groups)){
new_groups <- new_groups[which(sapply(new_groups, nrow) == config[3])]
} else if (n_output_groups > config[3]){
stop("value n_output_groups higher than max number of new groups for this configuration: check config_test to see available configurations")
} else {
new_groups <- select_matrices_by_number_output_groups(new_groups,n_output_groups)
}
# results_list[[1]] <- new_groups
results_list <- new_groups
} else {
stop("value of elements_from_original not available: check config_test to see available configurations")
}
}
return(results_list)
}