conf.int.boot<-function(niter=1000,score=d[,score], pd=d[,pd], target=d[,def], br=hist(score,plot=FALSE)$breaks,alpha=0.05){ score.binned<-bin(score,br = br)
cnts<-tapply(pd,score.binned,length) mean.pds<-tapply(pd,score.binned,mean) sd.pds<-tapply(pd,score.binned,function(p){ sqrt(mean(p*(1-p),na.rm=TRUE)/sum(!is.na(p))) })
drs<-tapply(target,score.binned,function(d){sum(d,na.rm=TRUE)/sum(!is.na(d))})
tab<-as.data.frame(matrix(NA,nrow=0,ncol=length(unique(score.binned)))) names(tab)<-unique(score.binned) for(i in 1:niter){ smpl<-sample(1:length(score),replace = TRUE) for(gr in unique(score.binned[smpl])){ tab[nrow(tab)+1,as.character(gr)]<-mean(pd[smpl][score.binned[smpl]==gr]) } }
low.bounds<-unlist(lapply(names(tab),function(s)quantile(tab[[s]],probs = alpha,na.rm = TRUE))) up.bounds<-unlist(lapply(names(tab),function(s)quantile(tab[[s]],probs = 1-alpha,na.rm = TRUE)))
res<-data.frame(mean.PD=mean.pds,low.ci=low.bounds,high.ci=up.bounds,DR=drs,breaks=unique(score.binned),count=cnts) row.names(res)<-NULL res[order(res$breaks),] }
gini.conf<-function(alpha=0.05,score=d[,score],target=d[,def], niter=10000,hist=FALSE,dir=graph.dir){
tab<-numeric(0)
#l<-list()
i<-1
while(i <=niter){
smpl<-sample(1:length(score),size=length(score),replace = TRUE)
#.GlobalEnv$l[length(l)+1]<-list(smpl)
if(any(target[smpl]==1)){
tab<-c(tab,2*roc(response = target[smpl],predictor=score[smpl])$auc -1)
i<-i+1
}
}
if(hist){
png(paste0(dir,"Gini_CI_boot.png"),width = 400,height = 400,units = "px")
hist(tab,main = paste("Gini distribution: niter=",niter),xlim=c(-1,1))
dev.off()
}
quantile(tab,probs =c(alpha,1-alpha))
}
gini.conf(score = x,target = s,hist = TRUE,niter=100) conf.int(score = sc,pd = x,target = s,br = c(800,900,1000,1100,1200)) conf.int.boot(score = sc,pd = x,target = s,br = c(800,900,1000,1100,1200))