1

I am trying to pivot my dataframe from tidy format to wide format, using a column with two values, using the following:

bai_wide = bai_trim %>% pivot_wider(names_from = Species, values_from = BAI)

But when I do this, NAs are produced in the resulting dataframe. The values should match, and when I inspect the original dataframe I can't find any instances where they do not.

I'm aware of this question being asked here , but it doesn't seem to address my issue

Output of dput(head(bai_trim, 100)) :

structure(list(Site = c("TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", 
"TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", 
"TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", 
"TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", 
"TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", 
"TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", 
"TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", 
"TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C2", 
"TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", 
"TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", 
"TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", 
"TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", 
"TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", 
"TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", 
"TN_C2", "TN_C2", "TN_C2", "TN_C2"), Year = c(1930L, 1931L, 1932L, 
1933L, 1934L, 1935L, 1936L, 1937L, 1938L, 1939L, 1940L, 1941L, 
1942L, 1943L, 1944L, 1945L, 1946L, 1947L, 1948L, 1949L, 1950L, 
1951L, 1952L, 1953L, 1954L, 1955L, 1956L, 1957L, 1958L, 1959L, 
1960L, 1961L, 1962L, 1963L, 1964L, 1965L, 1966L, 1967L, 1968L, 
1969L, 1970L, 1971L, 1972L, 1973L, 1974L, 1975L, 1976L, 1977L, 
1978L, 1979L, 1980L, 1981L, 1982L, 1930L, 1931L, 1932L, 1933L, 
1934L, 1935L, 1936L, 1937L, 1938L, 1939L, 1940L, 1941L, 1942L, 
1943L, 1944L, 1945L, 1946L, 1947L, 1948L, 1949L, 1950L, 1951L, 
1952L, 1953L, 1954L, 1955L, 1956L, 1957L, 1958L, 1959L, 1960L, 
1961L, 1962L, 1963L, 1964L, 1965L, 1966L, 1967L, 1968L, 1969L, 
1970L, 1971L, 1972L, 1973L, 1974L, 1975L, 1976L), Species = c("QR", 
"QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", 
"QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", 
"QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", 
"QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", 
"QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", 
"QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", 
"QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", 
"QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", 
"QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR"
), Sample.Depth = c(30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 
30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 
30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 
30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 
30L, 30L, 30L, 30L, 19L, 29L, 29L, 29L, 30L, 31L, 31L, 31L, 31L, 
31L, 31L, 31L, 32L, 32L, 32L, 32L, 32L, 32L, 32L, 32L, 32L, 32L, 
32L, 32L, 32L, 32L, 32L, 32L, 32L, 32L, 32L, 32L, 32L, 32L, 32L, 
32L, 32L, 32L, 32L, 32L, 32L, 32L, 32L, 32L, 32L, 32L, 32L, 32L
), Method = c("DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", 
"DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", 
"DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", 
"DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", 
"DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", 
"DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", 
"DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", 
"DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", 
"DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", 
"DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", 
"DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", 
"DBH", "DBH", "DBH"), BAI = c(1329.82725527258, 1583.55443950606, 
1562.33088797649, 1781.17227674256, 2239.26579940025, 2283.51207558404, 
1494.47266835451, 2079.21430793831, 2431.61659002079, 2063.6712031744, 
2279.01645480338, 2240.79817505811, 2537.08695503732, 2357.25996541304, 
2143.34894709899, 2963.42239899576, 3266.11822944487, 3188.08984551795, 
2053.72520969305, 1976.4907044215, 1974.33378516752, 2314.19980193622, 
1986.85089493789, 1717.7066077125, 1712.32576613411, 2270.12697244457, 
2265.44617869404, 2086.27614664055, 2290.16557632423, 2067.56268776649, 
2330.32100341616, 2594.45495623365, 1916.37409435704, 2615.32977322989, 
2994.09297309259, 3105.71799117356, 2608.13289994918, 2781.32817927508, 
2788.89468459625, 2814.41629406914, 2218.40404749475, 2375.62820321149, 
2454.40055519329, 2536.22462576871, 2673.39980127834, 2883.60697407212, 
2901.26428554182, 2759.19544971662, 3271.437201359, 3023.01356721046, 
2586.11651777101, 2683.77375275508, 2560.55282710926, 1028.27393956856, 
1254.97727247239, 1180.00666939284, 1162.75652641982, 1468.21393690705, 
1420.29545487908, 870.636254692378, 1558.97134681397, 1680.04973736316, 
1807.98548193521, 1887.32063639148, 1916.04119222857, 1949.52683704445, 
1921.80868471893, 1600.62264826328, 1859.9149833578, 2184.22704501268, 
2364.39029270987, 1853.12296621112, 1533.22199599478, 1797.1627135163, 
1738.07965789397, 1687.15007187521, 1592.13731685411, 1656.32266290939, 
2337.09276793395, 2353.86414716497, 2290.38356871338, 2562.25811266612, 
2576.09112815194, 2595.90714922909, 2892.38644610441, 1926.95398513788, 
2040.79373628591, 2636.83713546072, 3216.10408623204, 2399.34264253439, 
2411.58302876301, 2150.87125164971, 2456.28295814168, 2401.15926385922, 
2525.4045600946, 2619.28151832898, 2869.37020856327, 2457.47946097768, 
2505.49431848312, 2343.63069935373)), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -100L), groups = structure(list(
    Site = c("TN_C1", "TN_C2"), .rows = structure(list(1:53, 
        54:100), ptype = integer(0), class = c("vctrs_list_of", 
    "vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -2L), .drop = TRUE))

Here is a sample of the resulting dataframe:

structure(list(Site = c("TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", 
"TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", 
"TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", 
"TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", 
"TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", 
"TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", 
"TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", 
"TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C1", "TN_C2", 
"TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", 
"TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", 
"TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", 
"TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", 
"TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", 
"TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", 
"TN_C2", "TN_C2", "TN_C2", "TN_C2"), Year = c(1930L, 1931L, 1932L, 
1933L, 1934L, 1935L, 1936L, 1937L, 1938L, 1939L, 1940L, 1941L, 
1942L, 1943L, 1944L, 1945L, 1946L, 1947L, 1948L, 1949L, 1950L, 
1951L, 1952L, 1953L, 1954L, 1955L, 1956L, 1957L, 1958L, 1959L, 
1960L, 1961L, 1962L, 1963L, 1964L, 1965L, 1966L, 1967L, 1968L, 
1969L, 1970L, 1971L, 1972L, 1973L, 1974L, 1975L, 1976L, 1977L, 
1978L, 1979L, 1980L, 1981L, 1982L, 1930L, 1931L, 1932L, 1933L, 
1934L, 1935L, 1936L, 1937L, 1938L, 1939L, 1940L, 1941L, 1942L, 
1943L, 1944L, 1945L, 1946L, 1947L, 1948L, 1949L, 1950L, 1951L, 
1952L, 1953L, 1954L, 1955L, 1956L, 1957L, 1958L, 1959L, 1960L, 
1961L, 1962L, 1963L, 1964L, 1965L, 1966L, 1967L, 1968L, 1969L, 
1970L, 1971L, 1972L, 1973L, 1974L, 1975L, 1976L), Sample.Depth = c(30L, 
30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 
30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 
30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 
30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 29L, 26L, 
29L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 
30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 
30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 
30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L), Method = c("DBH", "DBH", 
"DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", 
"DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", 
"DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", 
"DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", 
"DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", 
"DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", 
"DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", 
"DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", 
"DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", 
"DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", 
"DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH"), QA = c(1112.01006767124, 
949.583411961448, 982.998663254952, 1087.36586010667, 1183.95219489817, 
1437.32692024663, 859.941821023156, 1378.82104138941, 1715.11803906048, 
1205.20873806085, 1314.40911284639, 1311.60513756934, 1334.12431932916, 
1250.4241475598, 1206.45532955227, 1669.46160189739, 1748.00363523953, 
1522.13188782807, 1068.48352520193, 1383.60591823409, 1629.94356878758, 
1716.81958142787, 1371.09743912618, 1177.56768513191, 1268.53730445105, 
1435.3852922059, 1431.72686167387, 1209.19417164828, 1475.2233795444, 
1353.14184703705, 1405.88977051333, 1502.28919572968, 1231.9358745554, 
1493.22404186533, 1608.26405912164, 1758.40007776153, 1358.26743655462, 
1604.19889400061, 1582.77287404955, 1460.38775673841, 1718.89866003169, 
1926.87492109503, 2035.25743659833, 2154.8572833228, 2155.72079265846, 
1938.13092846124, 2236.80568615272, 1805.23678218424, 1856.36065999217, 
1679.99679942377, 1441.26238614602, 1936.44942937414, 2133.45057631534, 
1085.3150108096, 974.860473716478, 986.924868102327, 1037.83237831603, 
1312.30667301435, 1405.30585427792, 773.207242839713, 1277.61195650029, 
1772.52157942987, 1388.91468492248, 1391.06708726821, 1268.28478285902, 
1295.29415128352, 1143.11153240523, 1058.28457720443, 1476.33487734882, 
1673.26309468627, 1455.5231756649, 1015.5006665268, 1242.52404078483, 
1348.94246837961, 1301.55518283897, 1075.92047580797, 977.592546236365, 
1046.42643732053, 1426.81431935015, 1475.07415572278, 1455.23907789844, 
1649.60781234728, 1563.4820765323, 1642.9919422491, 1865.42560165599, 
1329.73932888637, 1795.37507081007, 2413.71424418505, 2499.48425942841, 
2007.68534251994, 2279.94325095388, 2250.84540282916, 1988.31215010309, 
2384.77641721496, 2719.39349513496, 2888.75729672066, 2955.42338126383, 
2908.70715866689, 2724.37859079958, 2901.46999203769), QR = c(1329.82725527258, 
1583.55443950606, 1562.33088797649, 1781.17227674256, 2239.26579940025, 
2283.51207558404, 1494.47266835451, 2079.21430793831, 2431.61659002079, 
2063.6712031744, 2279.01645480338, 2240.79817505811, 2537.08695503732, 
2357.25996541304, 2143.34894709899, 2963.42239899576, 3266.11822944487, 
3188.08984551795, 2053.72520969305, 1976.4907044215, 1974.33378516752, 
2314.19980193622, 1986.85089493789, 1717.7066077125, 1712.32576613411, 
2270.12697244457, 2265.44617869404, 2086.27614664055, 2290.16557632423, 
2067.56268776649, 2330.32100341616, 2594.45495623365, 1916.37409435704, 
2615.32977322989, 2994.09297309259, 3105.71799117356, 2608.13289994918, 
2781.32817927508, 2788.89468459625, 2814.41629406914, 2218.40404749475, 
2375.62820321149, 2454.40055519329, 2536.22462576871, 2673.39980127834, 
2883.60697407212, 2901.26428554182, 2759.19544971662, 3271.437201359, 
3023.01356721046, 2586.11651777101, NA, NA, 1028.27393956856, 
NA, NA, 1162.75652641982, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA)), class = c("grouped_df", "tbl_df", "tbl", "data.frame"
), row.names = c(NA, -100L), groups = structure(list(Site = c("TN_C1", 
"TN_C2"), .rows = structure(list(1:53, 54:100), ptype = integer(0), class = c("vctrs_list_of", 
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -2L), .drop = TRUE))

Summary output of summary(bai_trim):

Site                Year        Species           Sample.Depth       Method               BAI        
 Length:6102        Min.   :1793   Length:6102        Min.   :  5.00   Length:6102        Min.   :-410.3  
 Class :character   1st Qu.:1918   Class :character   1st Qu.: 15.00   Class :character   1st Qu.:1383.0  
 Mode  :character   Median :1945   Mode  :character   Median : 28.00   Mode  :character   Median :2031.3  
                    Mean   :1938                      Mean   : 25.26                      Mean   :2302.9  
                    3rd Qu.:1967                      3rd Qu.: 30.00                      3rd Qu.:2891.8  
                    Max.   :2014                      Max.   :105.00                      Max.   :8924.4 

Output of sum(is.na(bai_trim):

sum(is.na(bai_trim))
[1] 0

Output of dput(new_df) on new_df = bai_trim %>% filter(Year > 1929, Year < 1977, Site == 'TN_C2')

 structure(list(Site = c("TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", 
"TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", 
"TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", 
"TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", 
"TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", 
"TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", 
"TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", 
"TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", 
"TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", 
"TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", 
"TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", 
"TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", 
"TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2", 
"TN_C2", "TN_C2", "TN_C2", "TN_C2", "TN_C2"), Year = c(1930L, 
1931L, 1932L, 1933L, 1934L, 1935L, 1936L, 1937L, 1938L, 1939L, 
1940L, 1941L, 1942L, 1943L, 1944L, 1945L, 1946L, 1947L, 1948L, 
1949L, 1950L, 1951L, 1952L, 1953L, 1954L, 1955L, 1956L, 1957L, 
1958L, 1959L, 1960L, 1961L, 1962L, 1963L, 1964L, 1965L, 1966L, 
1967L, 1968L, 1969L, 1970L, 1971L, 1972L, 1973L, 1974L, 1975L, 
1976L, 1930L, 1931L, 1932L, 1933L, 1934L, 1935L, 1936L, 1937L, 
1938L, 1939L, 1940L, 1941L, 1942L, 1943L, 1944L, 1945L, 1946L, 
1947L, 1948L, 1949L, 1950L, 1951L, 1952L, 1953L, 1954L, 1955L, 
1956L, 1957L, 1958L, 1959L, 1960L, 1961L, 1962L, 1963L, 1964L, 
1965L, 1966L, 1967L, 1968L, 1969L, 1970L, 1971L, 1972L, 1973L, 
1974L, 1975L, 1976L), Species = c("QA", "QA", "QA", "QA", "QA", 
"QA", "QA", "QA", "QA", "QA", "QA", "QA", "QA", "QA", "QA", "QA", 
"QA", "QA", "QA", "QA", "QA", "QA", "QA", "QA", "QA", "QA", "QA", 
"QA", "QA", "QA", "QA", "QA", "QA", "QA", "QA", "QA", "QA", "QA", 
"QA", "QA", "QA", "QA", "QA", "QA", "QA", "QA", "QA", "QR", "QR", 
"QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", 
"QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", 
"QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", 
"QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", "QR", 
"QR"), Sample.Depth = c(29L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 
30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 
30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 
30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 
29L, 29L, 29L, 30L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 32L, 32L, 
32L, 32L, 32L, 32L, 32L, 32L, 32L, 32L, 32L, 32L, 32L, 32L, 32L, 
32L, 32L, 32L, 32L, 32L, 32L, 32L, 32L, 32L, 32L, 32L, 32L, 32L, 
32L, 32L, 32L, 32L, 32L, 32L, 32L, 32L), Method = c("DBH", "DBH", 
"DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", 
"DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", 
"DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", 
"DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", 
"DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", 
"DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", 
"DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", 
"DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", 
"DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", 
"DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", "DBH", 
"DBH", "DBH"), BAI = c(1085.3150108096, 974.860473716478, 986.924868102327, 
1037.83237831603, 1312.30667301435, 1405.30585427792, 773.207242839713, 
1277.61195650029, 1772.52157942987, 1388.91468492248, 1391.06708726821, 
1268.28478285902, 1295.29415128352, 1143.11153240523, 1058.28457720443, 
1476.33487734882, 1673.26309468627, 1455.5231756649, 1015.5006665268, 
1242.52404078483, 1348.94246837961, 1301.55518283897, 1075.92047580797, 
977.592546236365, 1046.42643732053, 1426.81431935015, 1475.07415572278, 
1455.23907789844, 1649.60781234728, 1563.4820765323, 1642.9919422491, 
1865.42560165599, 1329.73932888637, 1795.37507081007, 2413.71424418505, 
2499.48425942841, 2007.68534251994, 2279.94325095388, 2250.84540282916, 
1988.31215010309, 2384.77641721496, 2719.39349513496, 2888.75729672066, 
2955.42338126383, 2908.70715866689, 2724.37859079958, 2901.46999203769, 
1028.27393956856, 1254.97727247239, 1180.00666939284, 1162.75652641982, 
1468.21393690705, 1420.29545487908, 870.636254692378, 1558.97134681397, 
1680.04973736316, 1807.98548193521, 1887.32063639148, 1916.04119222857, 
1949.52683704445, 1921.80868471893, 1600.62264826328, 1859.9149833578, 
2184.22704501268, 2364.39029270987, 1853.12296621112, 1533.22199599478, 
1797.1627135163, 1738.07965789397, 1687.15007187521, 1592.13731685411, 
1656.32266290939, 2337.09276793395, 2353.86414716497, 2290.38356871338, 
2562.25811266612, 2576.09112815194, 2595.90714922909, 2892.38644610441, 
1926.95398513788, 2040.79373628591, 2636.83713546072, 3216.10408623204, 
2399.34264253439, 2411.58302876301, 2150.87125164971, 2456.28295814168, 
2401.15926385922, 2525.4045600946, 2619.28151832898, 2869.37020856327, 
2457.47946097768, 2505.49431848312, 2343.63069935373)), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -94L), groups = structure(list(
    Site = "TN_C2", .rows = structure(list(1:94), ptype = integer(0), class = c("vctrs_list_of", 
    "vctrs_vctr", "list"))), row.names = c(NA, -1L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = TRUE))
brlockwood
  • 135
  • 7
  • 1
    It is unlikely to obtain the resulting data frame from the data frame that you have posted as the main data frame. Because in the first data frame, the `Species` column got only one value "QR", but the resulting data frame, got a column "QA" which I suppose comes from the code you ran. BTW, I saved the first data frame as `d`, and ran the code: `d %>% pivot_wider(names_from = Species, values_from = BAI)`, no NAs was generated. – Hamid G Dec 05 '21 at 04:02
  • Right. The resulting dataframe is from running the code on the full original dataframe. I only posted a sample of that dataframe due to size. – brlockwood Dec 05 '21 at 04:05
  • Got it, It seems there is nothing wrong with your code, so inspect your data frame to see the mode/type/class of your variables. This might help. Also if you got any warning printed on your console, share that. It might have info on why you are getting NAs. – Hamid G Dec 05 '21 at 04:11
  • There are no warnings printed to the console when I run the code. Could you be more specific as to what modes/types/classes of variables could produce the NAs? – brlockwood Dec 05 '21 at 04:12
  • I am not sure about your data set structure, but as an example, if you have a column with numbers saved as a character vector, then you or a function that treats the given vector `as.numeric ()`, those cells with numbers will be OK, however, empty (`""`) cells or those with `"NA"`, will generate `NA`. – Hamid G Dec 05 '21 at 04:49
  • I've added the summary outputs and result of `sum(is.na())` if that helps narrow it down. I'm stumped so any more help is appreciated – brlockwood Dec 05 '21 at 14:05
  • Can you provide the `dput()` of this subset of your df?: `new_df = bai_trim %>% filter(Year > 1980, Year < 1977)`. I wonder if you don't have QR species in the years which you can see NAs – RobertoT Dec 05 '21 at 14:16
  • I'm pretty sure that's going to produce an empty dataframe, since a year can't be both greater than 1980 and less than 1977. Did you mean between 1977-1980? – brlockwood Dec 05 '21 at 14:19
  • Oh, sorry. I didn't see there were two sites. Let's see only for site 2. Try `new_df = bai_trim %>% filter(Year > 1929, Year < 1977, Site == 'TN_C2')` pls to see this subset. PS: SO only notify the person you are replying to if you write @{name}. Like @RobertoT – RobertoT Dec 05 '21 at 15:12
  • Unfortunately that's too much data to post. But I did post for the 1977-1980 portion, which does contain a portion of that data for which NAs are being produced in the resulting df, and, as you can see, there is data for QR for this portion. So I'm not sure what's going on – brlockwood Dec 05 '21 at 15:15
  • I wanted to compare with your output which only has TN_C1 and TN_C2 sites. Specially looking for NAs values for TN_C2 site . But you posted a subset which lot more sites, years from 1977 to 1980 which I can't use (last year for the output you posted for TN_C2 site is 1976). I can't try what I'm thinking with 1977-1980 subset. – RobertoT Dec 05 '21 at 15:38
  • Please, try to provide this subset: `new_df = bai_trim %>% filter(Year > 1929, Year < 1977, Site == 'TN_C2')` . This should create only a df of 92 rows, instead of 140 rows as the one for 1977-1980 with all sites. PS: If you don't tag me with @ the web don't notify me when you reply me and I can't read the reply. – RobertoT Dec 05 '21 at 15:40
  • @RobertoT Ah, my mistake. I didn't see the site filter added to your code. I've updated my post with that output – brlockwood Dec 05 '21 at 15:43

1 Answers1

1

I used your dataframe filtered between 1930-1976 for Site: 'TN_C2' to pivot wider:

bai_wide = df %>% 
  pivot_wider(names_from = Species, values_from = BAI)

And this is the output:

> bai_wide
# A tibble: 92 x 6
# Groups:   Site [1]
   Site   Year Sample.Depth Method    QA    QR
   <chr> <int>        <int> <chr>  <dbl> <dbl>
 1 TN_C2  1930           29 DBH    1085. 1028.
 2 TN_C2  1931           30 DBH     975.   NA 
 3 TN_C2  1931           29 DBH      NA  1255.
 4 TN_C2  1932           30 DBH     987.   NA 
 5 TN_C2  1932           29 DBH      NA  1180.
 6 TN_C2  1933           30 DBH    1038. 1163.
 7 TN_C2  1934           30 DBH    1312.   NA 
 8 TN_C2  1934           31 DBH      NA  1468.
 9 TN_C2  1935           30 DBH    1405.   NA 
10 TN_C2  1935           31 DBH      NA  1420.
# ... with 82 more rows

We can see there is a lot of Nas. Why? Because you have multiple values in Sample.Depth. So pivoting wider only assign the values for each Sample.Depth, therefore you will have empty values in your columns.

Possible solutions depend on how precise do you want to be using the Depth parameter. That depends on the analysis you want to do.

  1. If you dont care about the depth of the sample, you can just create a new df without that column and then pivoting.

  2. If you just care but not really precise, you can combine the values of Depth for each year grouping by Site and Year and mutating a new column to use the mean of Depth. Like >%> group_by(Site,Year) >%> mutate(meanDepth = mean())

  3. Just keep the NAs values because this is the way your dataframe it is.

  4. There are more difficult ways to try to create relations to only have 1 row per year per site altering Sample.Depth but I don't have enough time to elaborate.

This just depend on you. Main thing you have to know it is that Sample.Depth is creating you the empty values because pivot_wider() create the new columns related to the other column values which were in the same row.

RobertoT
  • 1,663
  • 3
  • 12