The biggest thing to guard against using letters is if you have more than 26 providers. Options: if there are 52 or fewer providers, then c(LETTERS, letters)
may suffice; add another 10 with c(LETTERS, letters, 0:9)
; or go with num2alpha
for an arbitrary number-to-multi-letters solution.
num2alpha <- function(num, chr = letters, zero = "", sep = "") {
len <- length(chr)
stopifnot(len > 1)
signs <- ifelse(!is.na(num) & sign(num) < 0, "-", "")
num <- as.integer(abs(num))
is0 <- !is.na(num) & num < 1e-9
# num[num < 1] <- NA
out <- character(length(num))
mult <- 0
while (any(!is.na(num) & num > 0)) {
not0 <- !is.na(num) & num > 0
out[not0] <- paste0(chr[(num[not0] - 1) %% len + 1], sep, out[not0])
num[not0] <- (num[not0] - 1) %/% len
}
if (nzchar(sep)) out <- sub(paste0(sep, "$"), "", out)
out[is0] <- zero
out[is.na(num)] <- NA
out[!is.na(out)] <- paste0(signs[!is.na(out)], out[!is.na(out)])
out
}
data.table
library(data.table)
as.data.table(DF)[, letter := num2alpha(match(ProviderID, unique(ProviderID)))]
# ProviderID letter
# <char> <char>
# 1: Harry Potter a
# 2: Hermione Granger b
# 3: Ron Weasley c
# 4: Harry Potter a
# 5: Hermione Granger b
You can use upper-case with chr=
:
as.data.table(DF)[, letter := num2alpha(match(ProviderID, unique(ProviderID)), chr = LETTERS)]
# ProviderID letter
# <char> <char>
# 1: Harry Potter A
# 2: Hermione Granger B
# 3: Ron Weasley C
# 4: Harry Potter A
# 5: Hermione Granger B
I think the use of rleid
here is ill-advised, as Harry Potter
on non-consecutive rows will present different letter
s.
dplyr
library(dplyr)
DF %>%
mutate(letter = num2alpha(match(ProviderID, unique(ProviderID))))
base R
DF$letter <- num2alpha(match(DF$ProviderID, unique(DF$ProviderID)))