### Code by Eric Lecoutre, Universite catholique de Louvain, Belgium ### Winner of the R Homepage graphics competition 2004 ### Works in R 1.8.1 ... require(ade4) require(mva) require(RColorBrewer) require(pixmap) # -------------------------------------------------------------------------------------------------------------------------------- ltitle=function(x,backcolor="#e8c9c1",forecolor="darkred",cex=2,ypos=0.4){ plot(x=c(-1,1),y=c(0,1),xlim=c(0,1),ylim=c(0,1),type="n",axes=FALSE) polygon(x=c(-2,-2,2,2),y=c(-2,2,2,-2),col=backcolor,border=NA) text(x=0,y=ypos,pos=4,cex=cex,labels=x,col=forecolor) } # plotacpclust(USArrests) plotacpclust = function(data,xax=1,yax=2,hcut,cor=TRUE,clustermethod="ave",colbacktitle="#e8c9c1",wcos=3,Rpowered=FALSE,...){ # data: data.frame to analyze # xax, yax: Factors to select for graphs # Parameters for hclust # hcut # clustermethod require(ade4) pcr=princomp(data,cor=cor) datac=t((t(data)-pcr$center )/pcr$scale) hc=hclust(dist(data),method=clustermethod) if (missing(hcut)) hcut=quantile(hc$height,c(0.97)) def.par <- par(no.readonly = TRUE) on.exit(par(def.par)) mylayout=layout(matrix(c(1,2,3,4,5,1,2,3,4,6,7,7,7,8,9,7,7,7,10,11),ncol=4),widths=c(4/18,2/18,6/18,6/18),heights=c(lcm(1),3/6,1/6,lcm(1),1/3)) par(mar = c(0.1, 0.1, 0.1, 0.1)) par(oma = rep(1,4)) ltitle(paste("PCA ",dim(unclass(pcr$loadings))[2], "vars"),cex=1.6,ypos=0.7) text(x=0,y=0.2,pos=4,cex=1,labels=deparse(pcr$call),col="black") pcl=unclass(pcr$loadings) pclperc=100*(pcr$sdev)/sum(pcr$sdev) s.corcircle(pcl[,c(xax,yax)],1,2,sub=paste("(",xax,"-",yax,") ",round(sum(pclperc[c(xax,yax)]),0),"%",sep=""),possub="bottomright",csub=3,clabel=2) wsel=c(xax,yax) scatterutil.eigen(pcr$sdev,wsel=wsel,sub="") dend=hc dend$labels=rep("",length(dend$labels)) dend=as.dendrogram(dend) ngrp=length(cut(dend,hcut)$lower) ltitle(paste("Clustering ",ngrp, "groups"),cex=1.6,ypos=0.4) par(mar = c(3, 0.3, 1, 0.5)) # Dendrogram attr(dend,"edgetext") = round(max(hc$height),1) plot(dend, edgePar = list(lty=1, col=c("black","darkgrey")), edge.root=FALSE,horiz=TRUE,axes=TRUE) abline(v=hcut,col="red") text(x=hcut,y=length(hc$height),labels=as.character(round(hcut,1)),col="red",pos=4) colorsnames= brewer.pal(ngrp,"Dark2") groupes=cutree(hc,h=hcut) ttab=table(groupes) # Groups par(mar = c(0.3, 0.3, 1.6, 0.3)) #names.arg=paste("g",ngrp:1,sep="") mp=barplot(as.vector(rev(ttab)),horiz=TRUE,space=0,col=rev(colorsnames),xlim=c(0,max(ttab)+10),axes=FALSE,main="Groups",axisnames=FALSE) text(rev(ttab),mp,as.character(rev(ttab)),col=rev(colorsnames),cex=1.2,pos=4) # Main ACP scatterplot par(mar = c(0.1,0.1, 0.1,0.1)) selscores=pcr$scores[,c(xax,yax)] zi=apply(datac,1,FUN=function(vec)return(sum(vec^2))) cosinus= cbind(selscores[,1]^2 / zi,selscores[,2]^2 / zi) cosinus= cbind(cosinus,apply(cosinus,1,sum)) ww= (cosinus[,wcos])*4 +0.5 # Outliers? Test with median+1.5*IQ # Factor #1 out <- selscores[,1] < median(selscores[,1]) - 1.5 * diff(quantile(selscores[,1],c(0.25,0.75))) out = out | selscores[,1] > median(selscores[,1]) + 1.5 * diff(quantile(selscores[,1],c(0.25,0.75))) # factor #2 out = out | selscores[,2] < median(selscores[,2]) - 1.5 * diff(quantile(selscores[,2],c(0.25,0.75))) out = out | selscores[,2] > median(selscores[,2]) + 1.5 * diff(quantile(selscores[,2],c(0.25,0.75))) plot(selscores,axes=FALSE,main="",xlab="",ylab="",type="n") abline(h=0,col="black") abline(v=0,col="black") points(selscores[!out,1:2],col=(colorsnames[groupes])[!out],cex=ww,pch=16) text(x=selscores[out,1],y=selscores[out,2],labels=dimnames(selscores)[[1]][out],col=(colorsnames[groupes])[out]) box() # Factor 1 par(mar = c(0.1, 0.1, 0.1, 0.1)) ltitle(paste("Factor ",xax, " [",round(pclperc[xax],0),"%]",sep="" ),cex=1.6,ypos=0.4) plotdens(pcr$scores[,c(xax)]) # Factor 2 par(mar = c(0.1, 0.1, 0.1, 0.1)) ltitle(paste("Factor ",yax," [",round(pclperc[yax],0),"%]",sep=""),cex=1.6,ypos=0.4) plotdens(pcr$scores[,c(yax)]) # R logo #plot(0,0,type="n",xlim=c(0,100),ylim=c(0,15),axes=FALSE) #if (Rpowered){ # logo <- read.pnm(system.file("pictures/logo.ppm", package="pixmap")[1]) # addlogo(logo, px=c(100- (101/77)*11,100), py=c(0, 11), asp=1) #} #text(x=100-15,y=c(2,5),pos=2,labels=c("Powered by R ",date())) #box() } confshade2 = function(y, xlo, xhi, col = 8.) { n <- length(y) for(i in 1.:(n - 1.)) { polygon(c(xlo[i], xlo[i + 1.], xhi[i + 1.], xhi[i]), c(y[i], y[ i + 1.], y[i + 1.], y[i]), col = col, border = FALSE) } } confshade=function(x, ylo, yhi, col = 8.) { n <- length(x) for(i in 1.:(n - 1.)) { polygon(c(x[i], x[i + 1.], x[i + 1.], x[i]), c(ylo[i], ylo[i + 1.], yhi[i + 1.], yhi[i]), col = col, border = FALSE) } } plotdens=function(X, npts = 200, range = 1.5, xlab = "", ylab = "", main = "", ...) { dens <- density(X, n = npts) qu <- quantile(X, c(0., 0.25, 0.5, 0.75, 1.)) x <- dens$x y <- dens$y fqux <- x[abs(x - qu[2.]) == min(abs(x - qu[2.]))] fquy <- y[x == fqux] fquX <- as.numeric(qu[2.]) tqux <- x[abs(x - qu[4.]) == min(abs(x - qu[4.]))] tquy <- y[x == tqux] tquX <- as.numeric(qu[4.]) medx <- x[abs(x - qu[3.]) == min(abs(x - qu[3.]))] medy <- y[x == medx] # Prepare les donnees a dessiner # medX <- as.numeric(qu[3.]) dx <- dens$x dy <- dens$y dx2 <- c(dx[dx <= fquX], fquX, dx[(dx > fquX) & (dx <= medX)], medX, dx[ (dx > medX) & (dx <= tquX)], tquX, dx[dx > tquX]) dy2 <- c(dy[dx <= fquX], fquy, dy[(dx > fquX) & (dx <= medX)], medy, dy[(dx > medX) & (dx <= tquX)], tquy, dy[dx > tquX]) IQX <- dx2[(dx2 >= fquX) & (dx2 <= tquX)] # # # Initialise le graphique # # axes(axes = F, xlim = c(min(dx2), max(dx2)), ylim = c(min(dy2), max(d # # Dessine la densite IQy <- dy2[(dx2 >= fquX) & (dx2 <= tquX)] # Trace densit sous IQ # plot(0., 0., xlim = c(min(dx2), max(dx2)), ylim = c(min(dy2), max(dy2)), axes = F, xlab = xlab, ylab = ylab, main = main,type="n", ...) # Ajoute mediane # # confshade(IQX, rep(0., length(IQX)), IQy, col = "#bdfcc9") bdw <- (tquX - fquX)/20. x1 <- c(medX - bdw/2., medX - bdw/2.) x2 <- c(medX + bdw/2., medX + bdw/2.) y1 <- c(0., medy) # Ajoute lignes wiskers # # polygon(c(x1, rev(x2)), c(y1, rev(y1)), col = 0.) lines(x = c(fquX, fquX), y = c(0., fquy)) # Ajoute wiskers # # lines(x = c(tquX, tquX), y = c(0., tquy)) meany <- mean(dy2) IQrange <- tquX - fquX lines(x = c(medX - range * IQrange, fquX), y = c(meany, meany)) lines(x = c(tquX, medX + range * IQrange), y = c(meany, meany)) lines(x = c(medX - range * IQrange, medX - range * IQrange), y = c(meany - (max(dy2) - min(dy2))/8., meany + (max(dy2) - min(dy2))/8.)) # # Ajoute outliers # # lines(x = c(medX + range * IQrange, medX + range * IQrange), y = c(meany - (max(dy2) - min(dy2))/8., meany + (max(dy2) - min(dy2))/8.)) out <- c(X[X < medX - range * IQrange], X[X > medX + range * IQrange]) # # Ajoute les points... # # Ajoute l'axe points(out, rep(meany, length(out)), pch = 5., col = 2.) # Ajoute l'axe # # points(dx2, dy2, pch = ".", type = "l") #return(x = dessinx2, y = dessiny2) axis(1., at = round(c(min(x), fquX, medX, tquX, max(x)), 2.), labels = F, pos = 0.) invisible(list(x = dx2, y = dy2)) } BoxDens=function(data, npts = 200., x = c(0., 100.), y = c(0., 50.), orientation = "paysage", add = TRUE, col = 11., border=FALSE,colline = 1., Fill = TRUE) { dens <- density(data, n = npts) dx <- dens$x dy <- dens$y if(add == FALSE) plot(0., 0., axes = F, main = "", xlim = x, ylim = y, xlab = "", ylab = "") if(orientation == "paysage") { dx2 <- (dx - min(dx))/(max(dx) - min(dx)) * (x[2.] - x[1.]) * 0.98 + x[1.] dy2 <- (dy - min(dy))/(max(dy) - min(dy)) * (y[2.] - y[1.]) * 0.98 + y[1.] seqbelow <- rep(y[1.], length(dx)) if(Fill == T) confshade(dx2, seqbelow, dy2, col = col) if (border==TRUE) points(dx2, dy2, type = "l", col = colline) } else { dy2 <- (dx - min(dx))/(max(dx) - min(dx)) * (y[2.] - y[1.]) * 0.98 + y[1.] dx2 <- (dy - min(dy))/(max(dy) - min(dy)) * (x[2.] - x[1.]) * 0.98 + x[1.] seqleft <- rep(x[1.], length(dy)) if(Fill == T) confshade2(dy2, seqleft, dx2, col = col) if (border==TRUE) points(dx2, dy2, type = "l", col = colline) } polygon(x = c(x[1.], x[2.], x[2.], x[1.]), y = c(y[2.], y[2.], y[1.], y[ 1.]), density = 0.) } data(swiss) # png(file="swiss.png", width=600,height=400) plotacpclust(swiss[,1:5], 1, 3, hcut=48) # dev.off()