#Code to reproduce https://mbq.me/blog/augh-roc ## AUROC calculation #AUROC calculation code auroc<-function(score,cls){ n1<-sum(!cls); sum(cls)->n2 U<-sum(rank(score)[!cls])-n1*(n1+1)/2 return(1-U/n1/n2) } #... as a cryptic one-liner auroc1l<-function(score,cls) mean(rank(score)[cls]-1:sum(cls))/sum(!cls) #... caTools-like version colAuroc<-function(score,cls) colMeans(apply(score,2,rank)[cls,]-1:sum(cls))/sum(!cls) #AUROC significance pAuroc<-function(auroc,nx,ny){ W<-round((1-auroc)*nx*ny) pwilcox(W,nx,ny) } #Minimal significant AUROC (p=p), for a given nx, ny qAuroc<-function(nx,ny,p=.05){ ca<-1-(qwilcox(p,nx,ny)-1)/nx/ny #Even AUROC=1 won't be significant ca[!is.finite(ca)]<-NaN ca[ca>1]<-NaN ca } ##Plot makePlot<-function(maxN=110,maxNt=21){ library(gpclib) mergeSeries<-function(s){ if(length(s)<2) return(s) ans<-as(s[[1]],"gpc.poly");s[-1]->s for(ss in s) ans<-union(ans,as(ss,"gpc.poly")) return(ans@pts) } optimiseRects<-function(xmin,ymin,xmax,ymax,cls){ lapply(1:length(xmin),function(i) cbind( c(xmin[i],xmax[i],xmax[i],xmin[i]), c(ymin[i],ymin[i],ymax[i],ymax[i]) ))->polys lapply(split(polys,cls),mergeSeries) } do.call(rbind,lapply(c(7:maxN),function(N){ M<-1:floor(N/2) qAuroc(N-M,M)->ca data.frame( N=N, fra=M/N, fram=(M-1/2)/N, frap=(M+1/2)/N, ca=ca)[is.finite(ca),] }))->QQ QQ$aurocClass<-cut(QQ$ca,c(.5,.55,.6,.65,.7,.75,.8,.85,.9,.95,1,Inf),right=FALSE) QQ$aurocClass<-factor(QQ$aurocClass) acl<-levels(QQ$aurocClass) cc<-rev( c("#000000","#00ccff","#00d4aa","#80ff80","#ffcc00","#ff7f2a","#ff2a2a", "#d40000","#5500d4","#b380ff","#ff00cc")) plot(.2,1,type='n', xlim=c(0,.55),ylim=c(7,maxN), log="y", xlab="Fraction of the smaller class", ylab="Number of objects", main="Minimal AUROC significant at p=.05") abline(v=(1:10)/20) abline(h=7:maxNt) optimiseRects(QQ$fram,QQ$N-.5,QQ$frap,QQ$N+.5,QQ$aurocClass)->polys lapply(names(polys),function(pn) lapply(polys[[pn]],polygon,col=cc[which(acl==pn)])) QQ[QQ$N<=maxNt,]->QQ legend("bottomleft",fill=cc,acl,bg="white") }