Consider following data:
> df
nt_1 NB1949j NB2021j NB3366j NB19491f NB2021f NB3366f nt_2
1 1 1 1 1 1 0 1 1
2 0 0 1 0 1 1 0 0
3 1 1 0 0 0 0 0 1
4 0 0 1 1 1 1 1 0
5 0 0 1 1 1 1 0 0
6 1 1 1 0 1 0 0 1
7 1 1 0 0 0 1 0 1
8 0 0 1 0 0 0 1 0
9 0 0 0 0 0 1 0 0
10 -1 1 1 1 1 1 1 -1
How to calculate following variables:
NB1949jf=NB1949j+NB1949f
NB2021jf=NB2021j+NB2021f
NB3366jf=NB3366j+NB3366f
I have too many variables that start with "NB" and end with "j" or "f". The names of variables is "NB"+ a unique code like 1949+ 'j' or 'f'. I want to sum of columns based on the unique code(like 1949).(It require to get the unique codes from dataframe col names)
library(dplyr)
df=data.frame(
nt_1=c(1,0,1,0,0,1,1,0,0,-1),
NB1949j=c(1,0,1,0,0,1,1,0,0,1),
NB2021j=c(1,1,0,1,1,1,0,1,0,1),
NB3366j=c(1,0,0,1,1,0,0,0,0,1),
NB1949f=c(1,1,0,1,1,1,0,0,0,1),
NB2021f=c(0,1,0,1,1,0,1,0,1,1),
NB3366f=c(1,0,0,1,0,0,0,1,0,1),
nt_2=c(1,0,1,0,0,1,1,0,0,-1)
)
NB1949jf=NB1949j+NB1949f
NB2021jf=NB2021j+NB2021f
NB3366jf=NB3366j+NB3366f
I think the first step to select the columns:
df2<-df %>% select(starts_with("NB") , ends_with("j") |ends_with("f"))
df2
NB1949j NB2021j NB3366j NB19491f NB2021f NB3366f
1 1 1 1 1 0 1
2 0 1 0 1 1 0
3 1 0 0 0 0 0
4 0 1 1 1 1 1
5 0 1 1 1 1 0
6 1 1 0 1 0 0
7 1 0 0 0 1 0
8 0 1 0 0 0 1
9 0 0 0 0 1 0
10 1 1 1 1 1 1
Now extract the unique codes as follows:
code1<-substr(names(df2),3,nchar(names(df2))-1)