0

I have dput 2 columns from my dataframe here:

dput(data)
structure(list(FID = c(859L, 860L, 863L, 865L, 866L, 867L, 869L, 
870L, 871L, 872L, 873L, 1039L, 1041L, 1043L, 1044L, 1045L, 1046L, 
1048L, 1049L, 1050L, 1087L, 1088L, 1089L, 1103L, 1113L, 1114L, 
1238L, 1244L, 1247L, 1248L, 1249L, 1251L, 1252L, 1253L, 1255L, 
1256L, 1257L, 1260L, 1262L, 1263L, 1264L, 1265L, 1303L, 1304L, 
1305L, 1310L, 1311L, 1312L, 1313L, 1314L, 1315L, 1319L, 1320L, 
1321L, 1322L, 1323L, 1324L, 1325L, 1326L, 1327L, 1328L, 1336L, 
1337L, 1338L, 1339L, 1340L, 1341L, 1345L, 1346L, 1348L, 1393L, 
1395L, 1396L, 1397L, 1398L, 1399L, 1400L, 1401L, 1402L, 1406L, 
1407L, 1408L, 1409L, 1434L, 1437L, 1438L, 1441L, 1476L, 1478L, 
1479L, 1486L, 1488L, 1489L, 1535L, 1539L, 1540L, 1541L, 1655L, 
1656L, 1657L, 1658L, 1659L, 1660L, 1661L, 1662L, 1663L, 1664L, 
1665L, 1666L, 1667L, 1668L, 1669L, 1670L, 1671L, 1672L, 1684L, 
1733L, 1735L, 1736L, 1737L, 1738L, 1739L, 1740L, 1742L, 1743L, 
1805L, 1829L, 1830L, 1831L, 1834L, 1835L, 1843L, 1847L, 1848L, 
1860L, 1861L, 1862L, 1863L, 1864L, 1865L, 1866L, 1867L, 1868L, 
1869L, 1870L, 1871L, 1872L, 1873L, 1874L, 1875L, 1876L, 1877L, 
1878L, 1905L, 1906L, 1907L, 1908L, 1909L, 1910L, 1911L, 1912L, 
1913L, 1914L, 1918L, 1919L, 1920L, 1921L, 1922L, 1923L, 1924L, 
1925L, 1926L, 1927L, 1928L, 10284L, 10308L, 10309L, 10310L, 10311L, 
10312L, 10313L, 10314L, 10315L, 10316L, 10325L, 10327L, 10574L, 
10576L, 10577L, 10578L, 10579L, 10582L, 10583L, 10584L, 10585L, 
10586L, 10587L, 10588L, 10589L, 10590L, 10591L, 10592L, 10593L, 
10594L, 10595L, 10596L, 10597L, 10598L, 10599L, 10602L, 10611L, 
10612L, 10613L, 10614L, 10633L, 10685L, 10686L, 10867L, 10877L, 
10903L, 10920L, 10921L, 10922L, 10923L, 10924L, 10925L, 10926L, 
10927L), STAID = c(1484812, 1484938, 1485755, 1487000, 1487170, 
1488110, 1490000, 1491000, 1491500, 1492500, 1492600, 1576045, 
1576754, 1576787, 1577500, 1578310, 1578475, 1580520, 1580620, 
1580700, 1589025, 1589035, 1589100, 1589795, 1594440, 1594526, 
1653600, 1658000, 1660920, 1661050, 1661500, 1662800, 1663500, 
1664000, 1666500, 1667500, 1668000, 1670060, 1671020, 1671025, 
1671100, 1672500, 2029000, 2030000, 2030500, 2034000, 2035000, 
2036500, 2037000, 2037500, 2037705, 2040000, 2040892, 2041000, 
2041650, 2042500, 2042770, 2042822, 2042893, 2042928, 2043155, 
2047000, 2047370, 2047500, 2047783, 2049500, 2050000, 2052000, 
2052090, 2053500, 2080500, 2081022, 2081028, 2081054, 2081094, 
2081500, 2081747, 2081942, 2082585, 2083500, 2084000, 2084160, 
2084472, 2089500, 2091500, 2091814, 2092576, 2105769, 2108000, 
2108566, 2110725, 2110802, 2110815, 2135200, 2136030, 2136350, 
2136361, 2171000, 2171001, 2171500, 2171635, 2171645, 2171700, 
2171800, 2171850, 2171905, 2172000, 2172001, 2172002, 2172020, 
2172035, 2172040, 2172050, 2172053, 2172081, 2175000, 2200120, 
2201230, 2202040, 2202190, 2202500, 2202600, 2202680, 2203518, 
2203536, 2215500, 2224500, 2224940, 2225000, 2226000, 2226160, 
2228295, 2231254, 2231291, 2233475, 2233484, 2233500, 2234000, 
2234308, 2234324, 2234344, 2234384, 2234400, 2234435, 2234500, 
2234990, 2234991, 2235000, 2235200, 2235500, 2236000, 2236125, 
2236160, 2240500, 2243000, 2243959, 2243960, 2244040, 2244333, 
2244440, 2245260, 2245290, 2245340, 2246160, 2246318, 2246459, 
2246500, 2246515, 2246518, 2246621, 2246751, 2246804, 2246825, 
2247222, 21108125, 21720368, 21720508, 21720677, 21720698, 21720709, 
21720710, 21720711, 21720825, 21720869, 22035975, 22462002, 167300055, 
167862550, 167889257, 167891721, 167892964, 204279245, 204279294, 
204288539, 204288721, 204288771, 204288786, 204288831, 204288905, 
204289131, 204289402, 204289985, 204289989, 204289994, 204291317, 
204292275, 204293125, 204295063, 204295505, 204306533, 208062765, 
208111310, 208114150, 208250410, 209205053, 217206935, 217206962, 
301124081395901, 302309081333001, 330428079214800, 363342076261100, 
364145076245400, 364259076262300, 364312076211800, 364314076211900, 
364336076231300, 364336076231400, 364355076245000)), class = "data.frame", row.names = c(NA, 
-228L))

I needed to add a leading zero to the values for data$STAID in rows 1:117, which I did like so:

library(dplyr)
data<-
  data%>%
  mutate(STAID = ifelse(row_number()<=217, paste0("0", STAID), STAID))

But when I did that, it changed data$STAID to character data again. Now when I do data$STAID <- as.numeric(data$STAID) to make it numeric, it takes the leading zeros away. How do you retain the leading zeros and change the class to numeric?

Ryan
  • 1,048
  • 7
  • 14
  • 5
    Numerics are not printed with leading 0s. If you want the leading 0s to be part of the data, then you either need to convert to `character`. If you'd like to explore other options, please provide a bit more info about why you feel you need both `numeric` class and leading zeros. – Gregor Thomas Aug 26 '20 at 18:54
  • 3
    I'd also recommend using `formatC()` or `sprintf()` instead of `ifelse` and `paste`. Lots of explanation at the [FAQ about adding leading zeros](https://stackoverflow.com/q/5812493/903061). – Gregor Thomas Aug 26 '20 at 18:54
  • Also note one of the comments on that question: *"When you say you want to "add zeros", you presumably don't want to convert your integer columns to string/categorical in order to add the zero-padding inside the data itself, you want to keep them integer and only print leading zeros when rendering output."* I wouldn't make that assumption, but it's that choice you need to decide. – Gregor Thomas Aug 26 '20 at 18:57

0 Answers0