1

I have a df 'players' that features a column of column of values to represent numbers. Here is a sample of how the df looks:

Name                  Value
Mikael Forssell      9,00 Mill. €      
Hernan Crespo        15,00 Mill. €
Nuno Morais          1,00 Mill. €
Alex                 10,00 Mill. €
Filipe Oliveira      450 Th. €
Craig Rocastle       100 Th. €
Wayne Bridge         7,75 Mill. €
Jiri Jarosik         6,50 Mill €
Joe Keenan           600 Th. €

Is there any way I can set the values to equal their actual value in their most basic integer format, so that the output could look something like this:

Name                  Value
Mikael Forssell      9000000      
Hernan Crespo        15000000
Nuno Morais          1000000
Alex                 10000000
Filipe Oliveira      450000
Craig Rocastle       100000
Wayne Bridge         7750000
Jiri Jarosik         6500000
Joe Keenan           600000
zx8754
  • 52,746
  • 12
  • 114
  • 209
Paul R
  • 79
  • 2
  • 7

3 Answers3

1

Here is one option using stringr:

# Packages used
library(stringr) # for str_extract and str_replace (could be done in base with gsub)
library(magrittr) # for the pipe operator %>%

players$Measure <- str_extract(players$Value, "[A-z]+")
players$Value_clean <-
  players$Value %>%
  str_extract("[\\d,]+") %>%
  str_replace(",", ".") %>% 
  as.numeric()
players$Value_clean <- players$Value_clean * ifelse(players$Measure == "Th", 1000, 1000000)

players
#              Name         Value Measure Value_clean
# 1 Mikael Forssell  9,00 Mill. €    Mill     9000000
# 2   Hernan Crespo 15,00 Mill. €    Mill    15000000
# 3     Nuno Morais  1,00 Mill. €    Mill     1000000
# 4            Alex 10,00 Mill. €    Mill    10000000
# 5 Filipe Oliveira     450 Th. €      Th      450000
# 6  Craig Rocastle     100 Th. €      Th      100000
# 7    Wayne Bridge  7,75 Mill. €    Mill     7750000
# 8    Jiri Jarosik   6,50 Mill €    Mill     6500000
# 9      Joe Keenan     600 Th. €      Th      600000

Data

players <- data.frame(
  Name = c(
    "Mikael Forssell", "Hernan Crespo", "Nuno Morais", "Alex", 
    "Filipe Oliveira", "Craig Rocastle", "Wayne Bridge", "Jiri Jarosik", 
    "Joe Keenan"
  ),
  Value = c(
    "9,00 Mill. €", "15,00 Mill. €", "1,00 Mill. €", 
    "10,00 Mill. €", "450 Th. €", "100 Th. €", "7,75 Mill. €", 
    "6,50 Mill €", "600 Th. €"
  ),
  stringsAsFactors = FALSE
)
s_baldur
  • 29,441
  • 4
  • 36
  • 69
1

Here is the start:

x <- c("9,00 Mill. €", "15,00 Mill. €", "450 Th. €", "600 Th. €")

sapply(strsplit(x, " "), function(i){
  as.numeric(gsub(",", "", i[ 1 ], fixed = TRUE) ) *
    setNames(c(1000, 100000), c("Th.", "Mill."))[ i[ 2 ] ]
})
zx8754
  • 52,746
  • 12
  • 114
  • 209
0

Here is one solution using case_when and stringr

library(dplyr)
library(stringr)

df$Value <- str_replace_all(df$Value, ",", ".")
df %>% mutate(Value = case_when(str_detect(Value, "Mill.") ~ 
                                     as.numeric(str_extract(Value, "[^ ]*"))*1e6,
                                str_detect(Value, "Th.") ~  
                                     as.numeric(str_extract(Value, "[^ ]*"))*1e3))
Daniel
  • 2,207
  • 1
  • 11
  • 15