0

I have this data, seen below:

##   X    ID        DOB sector meters Oct  Res_FROM    Res_TO  Exp_FROM     Exp_TO
## 1 1 20100  8/24/1979    H38   6400   W 8/15/1979 5/15/1991 8/24/1979 12/31/1988
## 2 2 20101   5/5/1980    B01   1600  NW 5/15/1980 4/15/1991 5/15/1980 12/31/1988
## 3 3 20102  3/17/1979    H04   1600  SW 6/15/1972 8/15/1979 3/17/1979  8/15/1979
## 4 4 20103 11/30/1981    B09   3200  NE 1/15/1982 1/15/1984 1/15/1982  1/15/1984
## 5 5 20103 11/30/1981    B37   8000   N 1/15/1984 4/15/1986 1/15/1984  4/15/1986
## 6 6 20104   9/1/1978    B09   3200  NE 1/15/1982 1/15/1984 1/15/1982  1/15/1984
##   Exps_Grp Yr1952 Yr1953 Yr1954 Yr1955 Yr1956 Yr1957 Yr1958 Yr1959 Yr1960
## 1       NA     NA     NA     NA     NA     NA     NA     NA     NA     NA
## 2       NA     NA     NA     NA     NA     NA     NA     NA     NA     NA
## 3       NA     NA     NA     NA     NA     NA     NA     NA     NA     NA
## 4       NA     NA     NA     NA     NA     NA     NA     NA     NA     NA
## 5       NA     NA     NA     NA     NA     NA     NA     NA     NA     NA
## 6       NA     NA     NA     NA     NA     NA     NA     NA     NA     NA
##   Yr1961 Yr1962 Yr1963 Yr1964 Yr1965 Yr1966 Yr1967 Yr1968 Yr1969 Yr1970 Yr1971
## 1     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA
## 2     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA
## 3     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA
## 4     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA
## 5     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA
## 6     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA     NA
##   Yr1972 Yr1973 Yr1974 Yr1975 Yr1976 Yr1977 Yr1978   Yr1979    Yr1980    Yr1981
## 1     NA     NA     NA     NA     NA     NA     NA 1.082616 0.7834218 0.7834218
## 2     NA     NA     NA     NA     NA     NA     NA       NA 0.6825884 1.0937646
## 3     NA     NA     NA     NA     NA     NA     NA 4.673775        NA        NA
## 4     NA     NA     NA     NA     NA     NA     NA       NA        NA        NA
## 5     NA     NA     NA     NA     NA     NA     NA       NA        NA        NA
## 6     NA     NA     NA     NA     NA     NA     NA       NA        NA        NA
##      Yr1982    Yr1983    Yr1984    Yr1985    Yr1986    Yr1987    Yr1988
## 1 0.7834218 0.7834218 0.7834218 0.7834218 0.7834218 0.7834218 0.1956091
## 2 1.0937646 1.0937646 1.0937646 1.0937646 1.0937646 1.0937646 0.2730972
## 3        NA        NA        NA        NA        NA        NA        NA
## 4 2.7934596 2.8975827 0.1041230        NA        NA        NA        NA
## 5        NA        NA 0.5662659 0.5890579 0.1416258        NA        NA
## 6 2.7934596 2.8975827 0.1041230        NA        NA        NA        NA
##     Yrs_Exp arth_mean    median cumulative age residence
## 1 9.3616438 0.7545599 0.7834218   7.545599   9       112
## 2 8.6356164 0.9568931 1.0937646   8.612038   8       103
## 3 0.4136986 4.6737751 4.6737751   4.673775   0         4
## 4 2.0000000 1.9317218 2.7934596   5.795165   2        24
## 5 2.2493151 0.4323165 0.5662659   1.296950   4        27
## 6 2.0000000 1.9317218 2.7934596   5.795165   5        24

There are separate sectors, with some repeated, as multiple IDs may be located in said sectors. The residence column is the residence time in months that a person lived in the sector. My actual data is 14,000+ rows. I would like to add together all of the values in the residence columns for each sector (100 total in my actual data) and then take the average value to produce an average residence value for each of the 100 sectors (thus 100 residence values). I'm not entirely sure how to accomplish this and was seeking some assistance. Reproducible data below:

 structure(list(X = 1:10, ID = c(20100L, 20101L, 20102L, 20103L, 
 20103L, 20104L, 20104L, 20105L, 20105L, 20106L), DOB = c("8/24/1979", 
 "5/5/1980", "3/17/1979", "11/30/1981", "11/30/1981", "9/1/1978", 
 "9/1/1978", "12/3/1980", "12/3/1980", "4/25/1978"), sector = c("H38", 
 "B01", "H04", "B09", "B37", "B09", "B37", "B09", "B09", "B09"
 ), meters = c(6400L, 1600L, 1600L, 3200L, 8000L, 3200L, 8000L, 
 3200L, 3200L, 3200L), Oct = c("W", "NW", "SW", "NE", "N", "NE", 
 "N", "NE", "NE", "NE"), Res_FROM = c("8/15/1979", "5/15/1980", 
 "6/15/1972", "1/15/1982", "1/15/1984", "1/15/1982", "1/15/1984", 
 "12/15/1980", "8/15/1983", "4/15/1978"), Res_TO = c("5/15/1991", 
 "4/15/1991", "8/15/1979", "1/15/1984", "4/15/1986", "1/15/1984", 
 "4/15/1986", "8/15/1983", "3/15/1991", "8/15/1983"), Exp_FROM = c("8/24/1979", 
 "5/15/1980", "3/17/1979", "1/15/1982", "1/15/1984", "1/15/1982", 
 "1/15/1984", "12/15/1980", "8/15/1983", "4/25/1978"), Exp_TO = c("12/31/1988", 
 "12/31/1988", "8/15/1979", "1/15/1984", "4/15/1986", "1/15/1984", 
 "4/15/1986", "8/15/1983", "12/31/1988", "8/15/1983"), Exps_Grp = c(NA, 
 NA, NA, NA, NA, NA, NA, NA, NA, NA), Yr1952 = c(NA, NA, NA, NA, 
 NA, NA, NA, NA, NA, NA), Yr1953 = c(NA, NA, NA, NA, NA, NA, NA, 
 NA, NA, NA), Yr1954 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
 ), Yr1955 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), Yr1956 = c(NA, 
 NA, NA, NA, NA, NA, NA, NA, NA, NA), Yr1957 = c(NA, NA, NA, NA, 
 NA, NA, NA, NA, NA, NA), Yr1958 = c(NA, NA, NA, NA, NA, NA, NA, 
 NA, NA, NA), Yr1959 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
 ), Yr1960 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), Yr1961 = c(NA, 
 NA, NA, NA, NA, NA, NA, NA, NA, NA), Yr1962 = c(NA, NA, NA, NA, 
 NA, NA, NA, NA, NA, NA), Yr1963 = c(NA, NA, NA, NA, NA, NA, NA, 
 NA, NA, NA), Yr1964 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
 ), Yr1965 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), Yr1966 = c(NA, 
 NA, NA, NA, NA, NA, NA, NA, NA, NA), Yr1967 = c(NA, NA, NA, NA, 
 NA, NA, NA, NA, NA, NA), Yr1968 = c(NA, NA, NA, NA, NA, NA, NA, 
 NA, NA, NA), Yr1969 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
 ), Yr1970 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), Yr1971 = c(NA, 
 NA, NA, NA, NA, NA, NA, NA, NA, NA), Yr1972 = c(NA, NA, NA, NA, 
 NA, NA, NA, NA, NA, NA), Yr1973 = c(NA, NA, NA, NA, NA, NA, NA, 
 NA, NA, NA), Yr1974 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
 ), Yr1975 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), Yr1976 = c(NA, 
 NA, NA, NA, NA, NA, NA, NA, NA, NA), Yr1977 = c(NA, NA, NA, NA, 
 NA, NA, NA, NA, NA, NA), Yr1978 = c(NA, NA, NA, NA, NA, NA, NA, 
 NA, NA, 14.76869627), Yr1979 = c(1.082616155, NA, 4.673775148, 
 NA, NA, NA, NA, NA, NA, 10.8434728), Yr1980 = c(0.783421772, 
 0.682588398, NA, NA, NA, NA, NA, 0.120085751, NA, 2.897582683
 ), Yr1981 = c(0.783421772, 1.093764595, NA, NA, NA, NA, NA, 2.897582683, 
 NA, 2.897582683), Yr1982 = c(0.783421772, 1.093764595, NA, 2.793459642, 
 NA, 2.793459642, NA, 2.897582683, NA, 2.897582683), Yr1983 = c(0.783421772, 
 1.093764595, NA, 2.897582683, NA, 2.897582683, NA, 1.805844233, 
 1.09173845, 1.805844233), Yr1984 = c(0.783421772, 1.093764595, 
 NA, 0.104123041, 0.566265934, 0.104123041, 0.566265934, NA, 2.897582683, 
 NA), Yr1985 = c(0.783421772, 1.093764595, NA, NA, 0.589057923, 
 NA, 0.589057923, NA, 2.897582683, NA), Yr1986 = c(0.783421772, 
 1.093764595, NA, NA, 0.141625765, NA, 0.141625765, NA, 2.897582683, 
 NA), Yr1987 = c(0.783421772, 1.093764595, NA, NA, NA, NA, NA, 
 NA, 2.897582683, NA), Yr1988 = c(0.1956091, 0.27309722, NA, NA, 
 NA, NA, NA, NA, 0.723484539, NA), Yrs_Exp = c(9.361643836, 8.635616438, 
 0.41369863, 2, 2.249315068, 2, 2.249315068, 2.665753425, 5.383561644, 
 5.309589041), arth_mean = c(0.754559943, 0.956893087, 4.673775148, 
 1.931721789, 0.432316541, 1.931721789, 0.432316541, 1.930273838, 
 2.234258954, 6.018460225), median = c(0.783421772, 1.093764595, 
 4.673775148, 2.793459642, 0.566265934, 2.793459642, 0.566265934, 
 2.351713458, 2.897582683, 2.897582683), cumulative = c(7.545599433, 
 8.612037782, 4.673775148, 5.795165366, 1.296949622, 5.795165366, 
 1.296949622, 7.72109535, 13.40555372, 36.11076135), age = c(9L, 
 8L, 0L, 2L, 4L, 5L, 7L, 2L, 8L, 5L), residence = c(112L, 103L, 
 4L, 24L, 27L, 24L, 27L, 32L, 64L, 63L)), class = "data.frame", row.names = c(NA, 
 -10L))
barnsm2
  • 185
  • 7
  • 1
    which are the columns to aggregate. Do you want `df1 %>% group_by(sector) %>% summarise(residence = mean(residence))` – akrun Jul 10 '22 at 21:14
  • Or maybe `df1 %>% group_by(sector) %>% summarise(residence = mean(rowSums(across(matches("^Yr\\d{4}$")), na.rm = TRUE)), .groups = 'drop')` – akrun Jul 10 '22 at 21:17

0 Answers0