interesting problem. here is a solution that potentially could be more efficient but does the job
# function from https://www.statworx.com/de/blog/strsplit-but-keeping-the-delimiter/
strsplit <- function(x,
split,
type = "remove",
perl = FALSE,
...) {
if (type == "remove") {
# use base::strsplit
out <- base::strsplit(x = x, split = split, perl = perl, ...)
} else if (type == "before") {
# split before the delimiter and keep it
out <- base::strsplit(x = x,
split = paste0("(?<=.)(?=", split, ")"),
perl = TRUE,
...)
} else if (type == "after") {
# split after the delimiter and keep it
out <- base::strsplit(x = x,
split = paste0("(?<=", split, ")"),
perl = TRUE,
...)
} else {
# wrong type input
stop("type must be remove, after or before!")
}
return(out)
}
# convert to seconds
to_seconds <- c(H = 60 * 60,
M = 60,
S = 1)
get_seconds <- function(value, unit) {
value * to_seconds[unit]
}
# example vector
playtimes <- c("1H18M20S", "1H27M5S", "18M27S", "56M38S", "21S")
# extract time parts
times <- strsplit(playtimes,
split = "[A-Z]",
type = "after")
times
#> [[1]]
#> [1] "1H" "18M" "20S"
#>
#> [[2]]
#> [1] "1H" "27M" "5S"
#>
#> [[3]]
#> [1] "18M" "27S"
#>
#> [[4]]
#> [1] "56M" "38S"
#>
#> [[5]]
#> [1] "21S"
# calculate each time in seconds
sapply(times,
function(t) {
# split numeric and unit part
t_split <- strsplit(x = t,
split = "[A-Z]",
type = "before")
# calculate seconds for each unit part
times_in_seconds <- get_seconds(value = as.numeric(sapply(t_split, `[`, 1)),
unit = sapply(t_split, `[`, 2))
# sum of all parts
sum(times_in_seconds)
})
#> [1] 4700 5225 1107 3398 21