I can't quite seem to figure out the error of my code. I have 4 columns of 2 pairs. Each pair is a numeric column and a character column. The character column determines an exponent which the numeric column's values are then multiplied by 10 to the power of this exponent. I've been trying to convert the column, but I keep getting the same error no matter which way I try it. Here's my code:
library(tidyverse)
# file downloading DATA PROCESSING
url <- c("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2",
"https://d396qusza40orc.cloudfront.net/repdata%2Fpeer2_doc%2Fpd01016005curr.pdf",
"https://d396qusza40orc.cloudfront.net/repdata%2Fpeer2_doc%2FNCDC%20Storm%20Events-FAQ%20Page.pdf")
if (!file.exists("rawdata.csv.bz2")){
download.file(url = url[1],
destfile = "rawdata.csv.bz2")
}
if (!file.exists("rawdata_doc.pdf")){
download.file(url = url[2],
destfile = "rawdata_doc.doc")
}
if (!file.exists("rawdata_faq.pdf")){
download.file(url = url[3],
destfile = "rawdata_faq.pdf")
}
#read data
stormdata <- read.csv("rawdata.csv.bz2")
stormdata_orderedevtype <- group_by(stormdata, EVTYPE, .drop = FALSE)
for (i in nrow(stormdata_orderedevtype)){
if (stormdata_orderedevtype[i, "PROPDMGEXP"] %in% letters){
print(stormdata_orderedevtype[i, "PROPDMGEXP"])
stormdata_orderedevtype[i, "PROPDMGEXP"] <- toupper(stormdata_orderedevtype[i, "PROPDMGEXP"])
}
}
convert_to_exponent <- function(value, exponent){
exponentials_numeric <- as.character(1:9)
exponentials_string <- c(" ", "H", "K", rep(" ", 2), "M", rep(" ", 2), "B")
if (exponent %in% exponentials_numeric){
exponent <- which(exponentials_numeric == exponent)
} else if (exponent %in% LETTERS){
exponent <- which(exponentials_string == exponent)
} else {
exponent <- 0L
}
value <- value * (10^exponent)
return(value)
}
for (i in nrow(stormdata_orderedevtype)){
stormdata_orderedevtype[i, "PROPDMG"] <- convert_to_exponent(stormdata_orderedevtype[i, "PROPDMG"],
stormdata_orderedevtype[i, "PROPDMGEXP"])
stormdata_orderedevtype[i, "CROPDMG"] <- convert_to_exponent(stormdata_orderedevtype[i, "CROPDMG"],
stormdata_orderedevtype[i, "CROPDMGEXP"])
}
This is the error that I get:
Error: Assigned data `value` must be compatible with row subscript `i`.
x 1 row must be assigned.
x Assigned data has 0 rows.
i Row updates require a list value. Do you need `list()` or `as.list()`?
This is the output to str for the dataframe.
> str(stormdata_orderedevtype)
tibble [902,297 x 37] (S3: grouped_df/tbl_df/tbl/data.frame)
$ STATE__ : num [1:902297] 1 1 1 1 1 1 1 1 1 1 ...
$ BGN_DATE : chr [1:902297] "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
$ BGN_TIME : chr [1:902297] "0130" "0145" "1600" "0900" ...
$ TIME_ZONE : chr [1:902297] "CST" "CST" "CST" "CST" ...
$ COUNTY : num [1:902297] 97 3 57 89 43 77 9 123 125 57 ...
$ COUNTYNAME: chr [1:902297] "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
$ STATE : chr [1:902297] "AL" "AL" "AL" "AL" ...
$ EVTYPE : chr [1:902297] "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
$ BGN_RANGE : num [1:902297] 0 0 0 0 0 0 0 0 0 0 ...
$ BGN_AZI : chr [1:902297] "" "" "" "" ...
$ BGN_LOCATI: chr [1:902297] "" "" "" "" ...
$ END_DATE : chr [1:902297] "" "" "" "" ...
$ END_TIME : chr [1:902297] "" "" "" "" ...
$ COUNTY_END: num [1:902297] 0 0 0 0 0 0 0 0 0 0 ...
$ COUNTYENDN: logi [1:902297] NA NA NA NA NA NA ...
$ END_RANGE : num [1:902297] 0 0 0 0 0 0 0 0 0 0 ...
$ END_AZI : chr [1:902297] "" "" "" "" ...
$ END_LOCATI: chr [1:902297] "" "" "" "" ...
$ LENGTH : num [1:902297] 14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
$ WIDTH : num [1:902297] 100 150 123 100 150 177 33 33 100 100 ...
$ F : int [1:902297] 3 2 2 2 2 2 2 1 3 3 ...
$ MAG : num [1:902297] 0 0 0 0 0 0 0 0 0 0 ...
$ FATALITIES: num [1:902297] 0 0 0 0 0 0 0 0 1 0 ...
$ INJURIES : num [1:902297] 15 0 2 2 2 6 1 0 14 0 ...
$ PROPDMG : num [1:902297] 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
$ PROPDMGEXP: chr [1:902297] "K" "K" "K" "K" ...
$ CROPDMG : num [1:902297] 0 0 0 0 0 0 0 0 0 0 ...
$ CROPDMGEXP: chr [1:902297] "" "" "" "" ...
$ WFO : chr [1:902297] "" "" "" "" ...
$ STATEOFFIC: chr [1:902297] "" "" "" "" ...
$ ZONENAMES : chr [1:902297] "" "" "" "" ...
$ LATITUDE : num [1:902297] 3040 3042 3340 3458 3412 ...
$ LONGITUDE : num [1:902297] 8812 8755 8742 8626 8642 ...
$ LATITUDE_E: num [1:902297] 3051 0 0 0 0 ...
$ LONGITUDE_: num [1:902297] 8806 0 0 0 0 ...
$ REMARKS : chr [1:902297] "" "" "" "" ...
$ REFNUM : num [1:902297] 1 2 3 4 5 6 7 8 9 10 ...
- attr(*, "groups")= tibble [985 x 2] (S3: tbl_df/tbl/data.frame)
..$ EVTYPE: chr [1:985] " HIGH SURF ADVISORY" " COASTAL FLOOD" " FLASH FLOOD" " LIGHTNING" ...
..$ .rows : list<int> [1:985]
.. ..$ : int 448452
.. ..$ : int 265835
.. ..$ : int 408333
.. ..$ : int 311103
.. ..$ : int [1:4] 286077 354105 354972 384796
.. ..$ : int 314006
.. ..$ : int 418263
.. ..$ : int 377549
.. ..$ : int 246124
.. ..$ : int [1:4] 317134 318248 322498 327548
.. ..$ : int [1:2] 416341 489975
.. ..$ : int 453797
.. ..$ : int [1:4] 427306 427327 434840 434857
.. ..$ : int [1:6] 206503 245650 246814 291162 307731 308335
.. ..$ : int 189191
.. ..$ : int [1:103] 451315 468699 488146 488631 488996 488997 489017 489019 489091 489099 ...
.. ..$ : int [1:174] 647990 648794 648795 648796 649612 649792 651163 651164 651166 651175 ...
.. ..$ : int 214947
.. ..$ : int [1:386] 188986 197154 198144 198145 198146 198178 198179 245870 251401 263662 ...
.. ..$ : int 231124
.. ..$ : int 253531
.. ..$ : int [1:3] 312547 359347 435864
.. ..$ : int 232610
.. ..$ : int [1:2] 239662 239663
.. ..$ : int [1:2] 217289 230924
.. ..$ : int 324075
.. ..$ : int [1:3] 324078 330917 330921
.. ..$ : int [1:3] 267120 278570 297296
.. ..$ : int [1:14] 284177 296232 296804 301080 324113 339059 345011 383053 417601 435937 ...
.. ..$ : int [1:2719] 188774 188777 188792 188794 188795 188796 188797 188799 188809 188834 ...
.. ..$ : int [1:2] 210552 233564
.. ..$ : int 210522
.. ..$ : int 298058
.. ..$ : int 188776
.. ..$ : int 209769
.. ..$ : int [1:2] 192949 216769
.. ..$ : int 188919
.. ..$ : int 214966
.. ..$ : int 296257
.. ..$ : int 284189
.. ..$ : int [1:4] 192274 192276 438115 443080
.. ..$ : int [1:2] 256617 298069
.. ..$ : int [1:3] 279405 295188 309320
.. ..$ : int [1:12] 192993 192995 198083 198086 198092 198187 198194 210561 221430 230916 ...
.. ..$ : int 201215
.. ..$ : int [1:2] 210565 216244
.. ..$ : int 233585
.. ..$ : int 188780
.. ..$ : int [1:3] 248509 248511 435512
.. ..$ : int 246724
.. ..$ : int 330957
.. ..$ : int 324061
.. ..$ : int [1:6] 277743 277744 277745 277747 277759 304936
.. ..$ : int [1:650] 188818 188855 189047 189076 192338 192652 192657 194072 194082 194084 ...
.. ..$ : int [1:2] 277749 277750
.. ..$ : int [1:38] 252497 252649 252658 252670 252672 259875 259876 259916 259969 265511 ...
.. ..$ : int [1:143] 194119 194358 194390 207191 207718 208321 208734 208738 216667 217312 ...
.. ..$ : int [1:5] 313897 313902 330960 351805 365203
.. ..$ : int [1:2] 260030 265426
.. ..$ : int [1:8] 252625 252628 265818 265826 266920 266923 284073 296262
.. ..$ : int [1:2] 208154 220874
.. ..$ : int [1:2] 217276 217324
.. ..$ : int 252631
.. ..$ : int 296868
.. ..$ : int [1:10] 273552 279955 289917 297480 298493 298750 307694 307791 308267 308367
.. ..$ : int [1:72] 188094 188504 190675 194047 194049 194087 194088 194089 194416 195025 ...
.. ..$ : int [1:4] 200790 201131 201821 202203
.. ..$ : int [1:2] 201031 209197
.. ..$ : int 202181
.. ..$ : int [1:6] 290348 298653 307784 308345 308347 308368
.. ..$ : int 308606
.. ..$ : int 282923
.. ..$ : int 199738
.. ..$ : int [1:2] 279968 279975
.. ..$ : int [1:4] 323511 323971 330779 330781
.. ..$ : int [1:3] 198727 209859 213992
.. ..$ : int [1:4] 232880 232881 232991 451320
.. ..$ : int [1:6] 323972 330782 499070 499072 506532 506533
.. ..$ : int [1:539] 570800 570801 587891 593049 593050 601130 606119 606333 606441 606460 ...
.. ..$ : int 232613
.. ..$ : int 198198
.. ..$ : int 350345
.. ..$ : int [1:2] 417608 434951
.. ..$ : int [1:4] 283093 283094 297373 392225
.. ..$ : int 221549
.. ..$ : int [1:2] 252814 252818
.. ..$ : int [1:6] 197133 198114 202400 204463 216221 252788
.. ..$ : int 213456
.. ..$ : int [1:1293] 187617 189609 192264 192273 192277 192278 192279 192280 192281 192283 ...
.. ..$ : int [1:10] 626160 649578 677178 685299 725456 733336 780035 784174 839525 848498
.. ..$ : int [1:2] 192630 269336
.. ..$ : int [1:2] 192661 216980
.. ..$ : int 359911
.. ..$ : int 265184
.. ..$ : int [1:2488] 194361 194362 197784 197786 197791 199733 208576 208578 208611 215082 ...
.. ..$ : int [1:13] 206504 208540 208547 208554 208561 208587 208590 208599 208602 208607 ...
.. ..$ : int 474065
.. ..$ : int [1:9] 194403 377950 382045 382132 415947 416285 416435 450916 452928
.. ..$ : int [1:6] 359361 359395 364829 364839 427331 434863
.. .. [list output truncated]
.. ..@ ptype: int(0)
..- attr(*, ".drop")= logi FALSE
This is the output for dput().
dput(stormdata_orderedevtype[1:20, ])
structure(list(STATE__ = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1), BGN_DATE = c("4/18/1950 0:00:00", "4/18/1950 0:00:00",
"2/20/1951 0:00:00", "6/8/1951 0:00:00", "11/15/1951 0:00:00",
"11/15/1951 0:00:00", "11/16/1951 0:00:00", "1/22/1952 0:00:00",
"2/13/1952 0:00:00", "2/13/1952 0:00:00", "2/13/1952 0:00:00",
"2/13/1952 0:00:00", "2/13/1952 0:00:00", "2/29/1952 0:00:00",
"3/3/1952 0:00:00", "3/22/1952 0:00:00", "4/4/1952 0:00:00",
"5/10/1952 0:00:00", "5/10/1952 0:00:00", "5/10/1952 0:00:00"
), BGN_TIME = c("0130", "0145", "1600", "0900", "1500", "2000",
"0100", "0900", "2000", "2000", "2030", "2030", "2130", "1700",
"1310", "1500", "0620", "0900", "0900", "0900"), TIME_ZONE = c("CST",
"CST", "CST", "CST", "CST", "CST", "CST", "CST", "CST", "CST",
"CST", "CST", "CST", "CST", "CST", "CST", "CST", "CST", "CST",
"CST"), COUNTY = c(97, 3, 57, 89, 43, 77, 9, 123, 125, 57, 43,
9, 73, 49, 107, 103, 97, 57, 57, 57), COUNTYNAME = c("MOBILE",
"BALDWIN", "FAYETTE", "MADISON", "CULLMAN", "LAUDERDALE", "BLOUNT",
"TALLAPOOSA", "TUSCALOOSA", "FAYETTE", "CULLMAN", "BLOUNT", "JEFFERSON",
"DEKALB", "PICKENS", "MORGAN", "MOBILE", "FAYETTE", "FAYETTE",
"FAYETTE"), STATE = c("AL", "AL", "AL", "AL", "AL", "AL", "AL",
"AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL",
"AL", "AL"), EVTYPE = c("TORNADO", "TORNADO", "TORNADO", "TORNADO",
"TORNADO", "TORNADO", "TORNADO", "TORNADO", "TORNADO", "TORNADO",
"TORNADO", "TORNADO", "TORNADO", "TORNADO", "TORNADO", "TORNADO",
"TORNADO", "TORNADO", "TORNADO", "TORNADO"), BGN_RANGE = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), BGN_AZI = c("",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", ""), BGN_LOCATI = c("", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", ""), END_DATE = c("",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", ""), END_TIME = c("", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", ""), COUNTY_END = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), COUNTYENDN = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA), END_RANGE = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), END_AZI = c("", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", ""), END_LOCATI = c("",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", ""), LENGTH = c(14, 2, 0.1, 0, 0, 1.5, 1.5, 0, 3.3, 2.3,
1.3, 4.7, 0, 3.3, 3.3, 21.6, 0.2, 0.2, 0.2, 0.2), WIDTH = c(100,
150, 123, 100, 150, 177, 33, 33, 100, 100, 400, 400, 200, 400,
440, 100, 200, 50, 50, 50), F = c(3L, 2L, 2L, 2L, 2L, 2L, 2L,
1L, 3L, 3L, 1L, 1L, 3L, 3L, 3L, 4L, 1L, 1L, 1L, 1L), MAG = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), FATALITIES = c(0,
0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 4, 0, 0, 0, 0), INJURIES = c(15,
0, 2, 2, 2, 6, 1, 0, 14, 0, 3, 3, 26, 12, 6, 50, 2, 0, 0, 0),
PROPDMG = c(25, 2.5, 25, 2.5, 2.5, 2.5, 2.5, 2.5, 25, 25,
2.5, 2.5, 250, 0, 25, 25, 25, 25, 25, 25), PROPDMGEXP = c("K",
"K", "K", "K", "K", "K", "K", "K", "K", "K", "M", "M", "K",
"K", "K", "K", "K", "K", "K", "K"), CROPDMG = c(0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), CROPDMGEXP = c("",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", ""), WFO = c("", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", ""), STATEOFFIC = c("",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", ""), ZONENAMES = c("", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", ""), LATITUDE = c(3040,
3042, 3340, 3458, 3412, 3450, 3405, 3255, 3334, 3336, 3401,
3402, 3336, 3430, 3320, 3436, 3042, 3344, 3344, 3344), LONGITUDE = c(8812,
8755, 8742, 8626, 8642, 8748, 8631, 8558, 8740, 8738, 8645,
8644, 8656, 8542, 8754, 8700, 8817, 8745, 8745, 8745), LATITUDE_E = c(3051,
0, 0, 0, 0, 0, 0, 0, 3336, 3337, 3402, 3404, 0, 3432, 3322,
3441, 0, 0, 0, 0), LONGITUDE_ = c(8806, 0, 0, 0, 0, 0, 0,
0, 8738, 8737, 8644, 8640, 0, 8540, 8752, 8638, 0, 0, 0,
0), REMARKS = c("", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", ""), REFNUM = c(1, 2, 3,
4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
20)), row.names = c(NA, -20L), groups = structure(list(EVTYPE = "TORNADO",
.rows = structure(list(1:20), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = 1L, class = c("tbl_df",
"tbl", "data.frame"), .drop = FALSE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
I'm sorry I cannot provide a more detailed question because I don't know where exactly I'm going wrong in the first place. I tried googling the error message but I can't seem to get an exact match. I would appreciate any advice, thank you very much for your help.