1

When I want to change a column from factor to numeric with "as.numeric()", the final numbers are totally different from what I imported.

It is a problem due to the comma? Really strange...

Thanks!

dput(datatest) structure(list(HOURS.at.sea = structure(c(261L, 84L, 83L, 260L, 307L, 292L, 252L, 72L, 59L, 343L, 244L, 78L, 56L, 256L, 9L, 269L, 291L, 254L, 69L, 65L, 267L, 283L, 1L, 80L, 169L, 1L, 115L, 67L, 75L, 3L, 309L, 59L, 33L, 52L, 75L, 37L, 51L, 75L, 22L, 2L, 49L, 83L, 21L, 2L, 53L, 70L, 19L, 3L, 57L, 70L, 22L, 3L, 58L, 78L, 36L, 49L, 2L, 66L, 115L, 52L, 72L, 114L, 57L, 78L, 116L, 56L, 68L, 116L, 55L, 70L, 327L, 6L, 257L, 2L, 107L, 176L, 182L, 4L, 114L, 35L, 46L, 3L, 67L, 34L, 46L, 3L, 79L, 40L, 50L, 69L, 38L, 52L, 2L, 69L, 38L, 85L, 49L, 70L, 64L, 70L, 3L, 1L, 206L, 231L, 58L, 55L, 109L, 212L, 220L, 53L, 56L, 125L, 3L, 5L, 214L, 231L, 57L, 55L, 107L, 217L, 228L, 52L, 57L, 105L, 210L, 231L, 56L, 55L, 105L, 215L, 232L, 55L, 54L, 90L, 210L, 230L, 58L, 54L, 108L, 218L, 228L, 57L, 56L, 96L, 213L, 228L, 55L, 57L, 106L, 217L, 232L, 58L, 73L, 110L, 217L, 233L, 57L, 59L, 117L, 7L, 222L, 233L, 59L, 56L, 107L, 219L, 231L, 57L, 56L, 109L, 221L, 233L, 56L, 57L, 106L, 1L, 1L, 247L, 317L, 159L, 316L, 229L, 306L, 129L, 120L, 29L, 74L, 287L, 12L, 151L, 109L, 68L, 125L, 270L, 1L, 56L, 224L, 180L, 76L, 281L, 86L, 79L, 258L, 83L, 1L, 229L, 23L, 132L, 56L, 59L, 76L, 115L, 110L, 28L, 235L, 226L, 16L, 134L, 55L, 57L, 66L, 124L, 117L, 28L, 240L, 1L, 52L, 335L, 32L, 59L, 186L, 71L, 4L, 7L, 5L, 39L, 59L, 2L, 3L, 4L, 293L, 4L, 66L, 99L, 110L, 54L, 2L, 4L, 148L, 221L, 322L, 31L, 170L, 1L, 286L, 162L, 336L, 129L, 138L, 70L, 71L, 125L, 241L, 277L, 6L, 8L, 76L, 84L, 320L, 339L, 117L, 137L, 69L, 80L, 1L, 243L, 92L, 139L, 149L, 54L, 67L, 262L, 1L, 240L, 78L, 73L, 1L, 187L, 48L, 65L, 97L, 263L, 332L, 187L, 103L, 29L, 79L, 266L, 334L, 188L, 303L, 96L, 264L, 313L, 184L, 45L, 66L, 105L, 263L, 330L, 184L, 53L, 24L, 96L, 97L, 272L, 331L, 185L, 47L, 68L, 102L, 265L, 342L, 187L, 97L, 49L, 58L, 264L, 333L, 195L, 96L, 31L, 181L, 265L, 1L, 236L, 79L, 270L, 89L, 246L, 45L, 207L, 166L, 79L, 41L, 177L, 55L, 128L, 1L, 152L, 205L, 57L, 168L, 195L, 284L, 94L, 118L, 340L, 82L, 123L, 57L, 73L, 118L, 58L, 227L, 18L, 51L, 323L, 178L, 49L, 120L, 26L, 59L, 52L, 67L, 67L, 114L, 234L, 1L, 15L, 201L, 163L, 42L, 72L, 47L, 10L, 66L, 106L, 224L, 17L, 189L, 44L, 200L, 164L, 45L, 69L, 47L, 67L, 102L, 275L, 14L, 190L, 30L, 199L, 161L, 47L, 71L, 52L, 276L, 32L, 199L, 161L, 48L, 73L, 46L, 70L, 103L, 273L, 23L, 271L, 47L, 74L, 46L, 57L, 308L, 23L, 1L, 66L, 338L, 242L, 155L, 72L, 249L, 57L, 80L, 94L, 225L, 341L, 104L, 1L, 116L, 44L, 5L, 90L, 293L, 50L, 238L, 84L, 46L, 216L, 46L, 278L, 325L, 57L, 314L, 48L, 108L, 44L, 50L, 59L, 1L, 289L, 49L, 42L, 295L, 53L, 304L, 3L, 49L, 59L, 1L, 74L, 15L, 84L, 26L, 244L, 129L, 80L, 82L, 158L, 126L, 45L, 79L, 65L, 344L, 258L, 106L, 81L, 75L, 95L, 4L, 144L, 140L, 25L, 83L, 76L, 1L, 209L, 99L, 42L, 79L, 312L, 1L, 27L, 238L, 76L, 70L, 119L, 115L, 69L, 305L, 1L, 318L, 139L, 187L, 84L, 67L, 150L, 44L, 131L, 73L, 172L, 123L, 57L, 73L, 251L, 1L, 122L, 254L, 28L, 5L, 4L, 4L, 4L, 6L, 156L, 247L, 88L, 248L, 81L, 78L, 42L, 258L, 87L, 165L, 50L, 173L, 80L, 98L, 41L, 72L, 108L, 102L, 48L, 74L, 102L, 223L, 103L, 48L, 5L, 5L, 67L, 98L, 226L, 104L, 57L, 4L, 3L, 3L, 2L, 72L, 98L, 228L, 105L, 48L, 3L, 4L, 4L, 4L, 78L, 94L, 234L, 103L, 2L, 26L, 3L, 3L, 4L, 4L, 3L, 3L, 60L, 98L, 84L, 218L, 11L, 45L, 1L, 158L, 175L, 95L, 4L, 65L, 112L, 311L, 216L, 146L, 1L, 131L, 217L, 55L, 302L, 60L, 202L, 66L, 56L, 167L, 79L, 179L, 142L, 2L, 54L, 145L, 1L, 196L, 60L, 70L, 118L, 23L, 91L, 42L, 197L, 50L, 67L, 77L, 83L, 44L, 66L, 140L, 44L, 83L, 337L, 1L, 52L, 319L, 1L, 47L, 62L, 50L, 234L, 81L, 57L, 254L, 56L, 61L, 45L, 232L, 80L, 57L, 253L, 121L, 234L, 77L, 51L, 241L, 49L, 63L, 44L, 234L, 81L, 54L, 250L, 2L, 1L, 153L, 338L, 107L, 141L, 68L, 55L, 128L, 225L, 100L, 171L, 193L, 194L, 160L, 338L, 112L, 139L, 3L, 68L, 2L, 65L, 130L, 49L, 139L, 1L, 50L, 310L, 76L, 279L, 141L, 1L, 1L, 74L, 251L, 204L, 1L, 217L, 84L, 198L, 43L, 54L, 298L, 268L, 26L, 208L, 49L, 51L, 297L, 226L, 92L, 199L, 42L, 54L, 299L, 267L, 26L, 199L, 31L, 58L, 298L, 218L, 96L, 203L, 43L, 58L, 299L, 267L, 288L, 25L, 183L, 42L, 315L, 92L, 199L, 32L, 56L, 300L, 1L, 105L, 103L, 96L, 90L, 76L, 248L, 290L, 294L, 274L, 94L, 56L, 133L, 136L, 72L, 67L, 67L, 115L, 244L, 174L, 112L, 84L, 68L, 143L, 240L, 1L, 220L, 147L, 209L, 117L, 157L, 321L, 113L, 138L, 13L, 154L, 118L, 139L, 111L, 55L, 1L, 102L, 51L, 80L, 208L, 241L, 70L, 96L, 80L, 259L, 103L, 70L, 223L, 326L, 57L, 285L, 53L, 324L, 67L, 255L, 105L, 221L, 105L, 76L, 53L, 54L, 83L, 84L, 247L, 91L, 68L, 106L, 30L, 324L, 59L, 65L, 296L, 79L, 20L, 79L, 301L, 52L, 70L, 1L, 135L, 245L, 53L, 217L, 247L, 83L, 75L, 2L, 78L, 242L, 192L, 249L, 56L, 77L, 99L, 82L, 75L, 65L, 241L, 6L, 63L, 112L, 82L, 80L, 49L, 239L, 191L, 101L, 1L, 14L, 222L, 71L, 74L, 98L, 215L, 325L, 282L, 117L, 325L, 226L, 97L, 91L, 92L, 57L, 72L, 246L, 1L, 211L, 53L, 70L, 131L, 239L, 237L, 217L, 53L, 70L, 93L, 242L, 329L, 43L, 92L, 104L, 127L, 50L, 306L, 328L, 256L, 76L, 49L, 280L, 1L, 201L, 103L, 73L, 4L, 4L, 4L, 242L, 125L), .Label = c("#VALUE!", "0,1", "0,2", "0,3", "0,4", "0,5", "0,6", "0,8", "1,0", "1,3", "10,1", "10,2", "10,3", "10,4", "10,5", "10,8", "10,9", "1023,7", "107,5", "108,5", "108,9", "109,2", "11,0", "11,1", "11,2", "11,3", "11,4", "11,5", "11,6", "11,7", "11,8", "11,9", "110,1", "110,2", "110,4", "110,7", "110,8", "110,9", "111,1", "115,8", "12,0", "12,1", "12,2", "12,3", "12,4", "12,5", "12,6", "12,7", "12,8", "12,9", "13,0", "13,1", "13,2", "13,3", "13,4", "13,5", "13,6", "13,7", "13,8", "13,9", "132,9", "133,8", "133,9", "138,1", "14,0", "14,1", "14,2", "14,3", "14,4", "14,5", "14,6", "14,7", "14,8", "14,9", "15,0", "15,1", "15,2", "15,3", "15,4", "15,5", "15,6", "15,7", "15,8", "15,9", "156,1", "1569,2", "157,3", "157,6", "158,5", "16,0", "16,1", "16,2", "16,3", "16,4", "16,5", "16,6", "16,7", "16,8", "16,9", "162,4", "165,6", "17,0", "17,1", "17,2", "17,3", "17,4", "17,5", "17,6", "17,7", "17,8", "17,9", "18,0", "18,2", "18,3", "18,4", "18,5", "18,6", "18,7", "18,8", "18,9", "180,9", "181,4", "19,0", "19,1", "19,3", "19,4", "19,5", "19,6", "19,7", "19,8", "19,9", "2,1", "2,3", "2,5", "2,7", "2,8", "20,0", "20,1", "20,3", "20,5", "20,7", "208,1", "21,0", "21,1", "21,3", "21,5", "21,6", "21,8", "2101,8", "22,0", "22,1", "22,4", "22,5", "22,6", "225,8", "23,1", "23,2", "23,3", "23,6", "23,9", "24,3", "24,9", "25,3", "25,4", "253,7", "254,9", "255,3", "26,9", "27,7", "277,6", "278,9", "279,3", "28,0", "288,3", "29,6", "297,3", "299,6", "3,2", "3,3", "3,8", "30,3", "303,8", "32,1", "32,4", "32,5", "32,6", "32,7", "32,9", "322,0", "322,2", "326,4", "326,7", "328,9", "329,4", "33,1", "33,2", "33,3", "33,4", "33,5", "33,6", "33,7", "33,8", "33,9", "331,4", "34,0", "34,4", "34,5", "34,7", "34,9", "35,0", "35,3", "35,5", "35,6", "35,7", "35,8", "35,9", "36,0", "36,1", "36,2", "36,3", "36,4", "36,5", "36,6", "36,7", "36,8", "36,9", "37,0", "37,1", "37,2", "37,3", "37,4", "37,5", "37,6", "37,8", "37,9", "370,8", "375,6", "38,0", "38,1", "38,2", "38,3", "38,4", "38,5", "38,6", "38,7", "38,8", "38,9", "39,0", "39,1", "39,2", "39,3", "39,4", "39,6", "39,7", "39,8", "39,9", "396,2", "40,1", "40,2", "40,3", "40,7", "40,9", "41,3", "41,4", "41,6", "41,7", "41,8", "41,9", "42,7", "42,8", "421,0", "43,5", "43,7", "433,0", "44,2", "446,1", "448,9", "449,3", "45,4", "45,8", "47,7", "472,8", "49,2", "492,3", "494,9", "497,9", "507,6", "520,8", "54,4", "541,6", "575,4", "577,4", "6,6", "6,8", "60,0", "61,2", "61,4", "61,5", "61,6", "61,7", "61,9", "619,1", "62,3", "62,9", "63,0", "63,5", "64,0", "64,1", "64,6", "642,8", "646,2", "65,8", "663,5", "666,2", "671,6", "7,2", "7,8", "708,7", "711,6", "8,1", "8,2", "8,3", "8,5", "8,7", "8,8", "8,9", "804,0", "829,0", "856,0", "87,0", "87,4", "87,5", "88,1", "88,3", "9,0", "9,1", "9,3", "9,5", "9,7", "9,8", "9,9", "90,9", "928,2", "975,8"), class = "factor")), .Names = "HOURS.at.sea", class = "data.frame", row.names = c(NA, -913L))

miken32
  • 42,008
  • 16
  • 111
  • 154
nif
  • 39
  • 1
  • 1
  • 8
  • Comments are not for extended discussion; this conversation has been [moved to chat](http://chat.stackoverflow.com/rooms/66565/discussion-on-question-by-nif-factor-to-numeric-data-are-totally-different). – Taryn Dec 10 '14 at 11:21

1 Answers1

8

If the commas are thousands separators, a common problem when users save a formatted Excel spreadsheet as *.csv and then try to import it into R, then you have two problems. Commas are not allowed in numbers in R (or they are interpreted as decimal point - depends on your locale setting) so, e.g. as.numeric("1,000") will return NA, not 1000. You have to get rid of the commas and also convert from factor to numeric.

a <- factor(c("10","20","30","40","50"))
as.numeric(a)               # returns the factor codes!!
# [1] 1 2 3 4 5
as.numeric(as.character(a)) # returns the factor levels, as numeric
# [1] 10 20 30 40 50

b <- factor(c(10,20,30,40,50))
as.numeric(b)               # returns the factor codes!!
# [1] 1 2 3 4 5

c <- factor(c("1,000","2,000","3,000","4,000","5,000"))
as.numeric(c)               # returns the factor codes
# [1] 1 2 3 4 5
as.numeric(as.character(c)) # returns NA - commas are NOT allowed in numbers in R
# [1] NA NA NA NA NA
# Warning message:
# NAs introduced by coercion 
as.numeric(gsub(",","",c,fixed=TRUE))
# [1] 1000 2000 3000 4000 5000

In the last line, gsub(",","",c,fixed=TRUE) just removes the commas.

EDIT As @CarlWitthoft points out in the comment, if the commas are decimal separators, you can import the data using:

df <- read.csv("mydata.csv", dec=",")

This will avoid a lot of problems later. Given the (already imported) dataset you present in the question, this will fix it:

datatest$HOURS.at.sea <- as.numeric(gsub(",",".",datatest$HOURS.at.sea,fixed=TRUE))

This replaces the "," with ".". Since gsub(...) returns a character vector, not a factor, you can use as.numeric(...) directly on that. Notice that you still get some NAs, because some of the rows have "#VALUE!" - looks like an Excel dump.

jlhoward
  • 58,004
  • 7
  • 97
  • 140
  • That's not strictly correct. "commas vs. periods" depends on the your Locale setting. – Carl Witthoft Dec 10 '14 at 15:39
  • @CarlWitthoft Yes. I posted this about an hour before OP added the `dput`, and assumed the reference to commas was to thousands separator. This is a common problem when exporting formatted Excel docs to csv and importing that into R. In OP's case it looks like they have commas as the decimal separator. I'll edit the answer. – jlhoward Dec 10 '14 at 17:54
  • Thanks for the update, but no thanks for criminally misspelling my name :-) – Carl Witthoft Dec 10 '14 at 21:24
  • 1
    @CarlWitthoft Sorry. Edited. – jlhoward Dec 10 '14 at 21:29