I wrote a piece of code (appended below) that works fine when I run it in serial, but when I use the foreach and doparallel libraries in R, I get an error code that reads: " task 1 failed - "missing value where TRUE/FALSE needed"
Everything inside the for each loop works on its own, and on a smaller batch, I can run it serially and it works.
ListOfColumns <- colnames(tempdata)
foreach(i = 1:nSubsets,
.export = ls(globalenv())) %dopar% {
DoubleTempData <- get(paste0("Subset", i))
DoubleTempData <- subset(DoubleTempData, select = -c(subset))
RowCounter <- 2
ColumnFigurer <- 2
LastCATEGORYIndicator <- "THERE IS NO CATEGORY, ONLY ZUUL"
while (RowCounter <= nrow(DoubleTempData)) {
print(paste("Checking row ", RowCounter))
RowChecker <- max(1, RowCounter - 5)
while (RowChecker < RowCounter) {
print(paste("Checking row",
RowCounter,
"against row",
RowChecker))
if (DoubleTempData$CATEGORY[RowChecker] == DoubleTempData$CATEGORY[RowCounter])
{
print("The rows match!")
while (ColumnFigurer > 0) {
if (DoubleTempData$CATEGORY[RowCounter] != LastCATEGORYIndicator) {
ColumnFigurer <- 2
}
print(paste ("Checking Iteration", ColumnFigurer))
if (ColumnFigurer * length(ListOfColumns) <= length(colnames(DoubleTempData)))
{
print(paste("Iteration", ColumnFigurer, " exists"))
CellChecker <-
((ColumnFigurer - 1) * length(ListOfColumns) + 1)
if (is.na(DoubleTempData[[RowChecker, CellChecker]])) {
print(paste("Current value is NA. Writing in new value."))
ColumnCounter <- 1
while (ColumnCounter <= length(ListOfColumns)) {
DoubleTempData[[RowChecker, (ColumnFigurer - 1) * length(ListOfColumns) +
ColumnCounter]] <-
DoubleTempData[[RowCounter, ColumnCounter]]
ColumnCounter <- ColumnCounter + 1
}
DoubleTempData <- DoubleTempData[-RowCounter]
LastCATEGORYIndicator <-
DoubleTempData$CATEGORY[RowChecker]
RowCounter <- max(2, RowCounter - 1)
ColumnFigurer <- ColumnFigurer + 1
break
}
else
{
print(paste(
"Current value is not NA, increasing iteration count."
))
ColumnFigurer <- ColumnFigurer + 1
}
}
if (ColumnFigurer * length(ListOfColumns) > length(colnames(DoubleTempData)))
{
print(paste(
"Iteration ",
ColumnFigurer,
"does not exist, adding iteration."
))
ColumnAdder <- 1
while (ColumnAdder <= length(ListOfColumns)) {
NewColumnName <-
paste0(ListOfColumns[ColumnAdder], "_", ColumnFigurer)
DoubleTempData[, NewColumnName] <- NA
ColumnAdder <- ColumnAdder + 1
}
}
}
}
RowChecker <- RowChecker + 1
}
RowCounter <- RowCounter + 1
}
assign(paste0("Subset", i), DoubleTempData)
}
For example, here is a sample of a randomly generated Subset1 that I ran, with about 70 observations and 7 columns (one of which gets dropped by the program as intended):
[![enter image description here][1]][1] [1]: https://i.stack.imgur.com/Jlytj.png
It then outputs a dataset with 9 observations, and 60 columns: