Alexey Masyutin

11
reputation
3

conf.int.boot<-function(niter=1000,score=d[,score], pd=d[,pd], target=d[,def], br=hist(score,plot=FALSE)$breaks,alpha=0.05){ score.binned<-bin(score,br = br)

cnts<-tapply(pd,score.binned,length) mean.pds<-tapply(pd,score.binned,mean) sd.pds<-tapply(pd,score.binned,function(p){ sqrt(mean(p*(1-p),na.rm=TRUE)/sum(!is.na(p))) })

drs<-tapply(target,score.binned,function(d){sum(d,na.rm=TRUE)/sum(!is.na(d))})

tab<-as.data.frame(matrix(NA,nrow=0,ncol=length(unique(score.binned)))) names(tab)<-unique(score.binned) for(i in 1:niter){ smpl<-sample(1:length(score),replace = TRUE) for(gr in unique(score.binned[smpl])){ tab[nrow(tab)+1,as.character(gr)]<-mean(pd[smpl][score.binned[smpl]==gr]) } }

low.bounds<-unlist(lapply(names(tab),function(s)quantile(tab[[s]],probs = alpha,na.rm = TRUE))) up.bounds<-unlist(lapply(names(tab),function(s)quantile(tab[[s]],probs = 1-alpha,na.rm = TRUE)))

res<-data.frame(mean.PD=mean.pds,low.ci=low.bounds,high.ci=up.bounds,DR=drs,breaks=unique(score.binned),count=cnts) row.names(res)<-NULL res[order(res$breaks),] }

gini.conf<-function(alpha=0.05,score=d[,score],target=d[,def], niter=10000,hist=FALSE,dir=graph.dir){ tab<-numeric(0) #l<-list() i<-1 while(i <=niter){ smpl<-sample(1:length(score),size=length(score),replace = TRUE)
#.GlobalEnv$l[length(l)+1]<-list(smpl) if(any(target[smpl]==1)){ tab<-c(tab,2*roc(response = target[smpl],predictor=score[smpl])$auc -1) i<-i+1 } } if(hist){ png(paste0(dir,"Gini_CI_boot.png"),width = 400,height = 400,units = "px") hist(tab,main = paste("Gini distribution: niter=",niter),xlim=c(-1,1)) dev.off() }

quantile(tab,probs =c(alpha,1-alpha))

}

gini.conf(score = x,target = s,hist = TRUE,niter=100) conf.int(score = sc,pd = x,target = s,br = c(800,900,1000,1100,1200)) conf.int.boot(score = sc,pd = x,target = s,br = c(800,900,1000,1100,1200))