What is the value of df$Unique_number[2]
, is it an empty string? You can first convert it to NA
df$Unique_number[df$Unique_number == ''] <- NA
and then use na.locf
from the zoo
package
df$Unique_number <- zoo::na.locf(df$Unique_number)
This will carry over the last non-NA observation to replace NAs.
Edit
To preseve original NA
values, split your dataframe in two and operate only on the part that contains the values you want to replace (I am assuming empty strings)
df0 = df[is.na(df$Unique_number), ]
df1 = df[-is.na(df$Unique_number), ]
(alternatively use split(df, is.na(df$Unique_number)
) and then call the code above on df1
and finally rbind
them.
Edit 2
Here is another approach, I'm pretty sure it will be slower than the one above that uses zoo
but lets you specify your own string
MISSING_STRING = '' # String you want replaced with last non-NA value
x0 <- c("1", "2", "", "3", "4", "", "", "5", "6", NA, "", "7", "8",
"", "9", "10", "") # Example vector
x <- x0 # Store initial example vector for comparison at the end
missing.ids <- which(is.na(x) | x == MISSING_STRING)
replacement.ids <- missing.ids - 1
replacement.ids[1 + which(diff(replacement.ids) == 1)] <- replacement.ids[diff(replacement.ids) == 1]
na.ids <- is.na(x)
x[missing.ids] <- x[replacement.ids]
x[na.ids] <- NA
# Compare initial vs final value
cbind(x0, x)
x0 x
[1,] "1" "1"
[2,] "2" "2"
[3,] "" "2"
[4,] "3" "3"
[5,] "4" "4"
[6,] "" "4"
[7,] "" "4"
[8,] "5" "5"
[9,] "6" "6"
[10,] NA NA
[11,] "" "6"
[12,] "7" "7"
[13,] "8" "8"
[14,] "" "8"
[15,] "9" "9"
[16,] "10" "10"
[17,] "" "10"