Archive

Posts Tagged ‘multivariate analysis’

Plot color symbols upon s.class {ade4} outputs

24 August 2012 1 comment

I am used to perform mutlivariate analyses using the R package ade4 by D. Chessel et al. The package features some wonderful graphical functions like s.class or s.label. Unfortunately, these functions not always allow using different colors or pch values when plotting the scatter diagrams with representation of point classes.
The present post deals with that point and will provide some code allowing to get graphical outputs looking like the one below:

A scatter diagram with classes and different symbols & colors

The functions s.label {ade4} does not allow plotting different symbols for the classes and for that reason I have modified the function and transformed it into s.label.bis. It is:

s.label.bis <- function (dfxy, xax = 1, yax = 2, label = row.names(dfxy), clabel = 1,
    pch = 20, cpoint = if (clabel == 0) 1 else 0, boxes = TRUE,
    neig = NULL, cneig = 2, xlim = NULL, ylim = NULL, grid = TRUE,
    addaxes = TRUE, cgrid = 1, include.origin = TRUE, origin = c(0,
        0), sub = "", csub = 1.25, possub = "bottomleft", pixmap = NULL,
    contour = NULL, area = NULL, add.plot = FALSE, col="black", bg=NULL,cex=par("cex"))
{
    dfxy <- data.frame(dfxy)
    opar <- par(mar = par("mar"))
    on.exit(par(opar))
    par(mar = c(0.1, 0.1, 0.1, 0.1))
    coo <- scatterutil.base(dfxy = dfxy, xax = xax, yax = yax,
        xlim = xlim, ylim = ylim, grid = grid, addaxes = addaxes,
        cgrid = cgrid, include.origin = include.origin, origin = origin,
        sub = sub, csub = csub, possub = possub, pixmap = pixmap,
        contour = contour, area = area, add.plot = add.plot)
    if (!is.null(neig)) {
        if (is.null(class(neig)))
            neig <- NULL
        if (class(neig) != "neig")
            neig <- NULL
        deg <- attr(neig, "degrees")
        if ((length(deg)) != (length(coo$x)))
            neig <- NULL
    }
    if (!is.null(neig)) {
        fun <- function(x, coo) {
            segments(coo$x[x[1]], coo$y[x[1]], coo$x[x[2]], coo$y[x[2]],
                lwd = par("lwd") * cneig)
        }
        apply(unclass(neig), 1, fun, coo = coo)
    }
    if (clabel > 0)
        scatterutil.eti(coo$x, coo$y, label, clabel, boxes)
    if (cpoint > 0 & clabel < 1e-06)
        points(coo$x, coo$y, pch = pch, cex = cex,col=col, bg=bg)
    box()
    invisible(match.call())
}

Similarly I changed the ade4 function s.class into s.class.bis which does not plot the “branches” of the stars but only the centroid of each class. It is:

s.class.bis<-function (dfxy, fac, wt = rep(1, length(fac)), xax = 1, yax = 2,
    cstar = 1, cellipse = 1.5, axesell = TRUE, label = levels(fac),
    clabel = 1, cpoint = 1, pch = 20, col = rep(1, length(levels(fac))),
    xlim = NULL, ylim = NULL, grid = TRUE, addaxes = TRUE, origin = c(0,
        0), include.origin = TRUE, sub = "", csub = 1, possub = "bottomleft",
    cgrid = 1, pixmap = NULL, contour = NULL, area = NULL, add.plot = FALSE)
{
scatterutil.eti.jp<-function (x, y, label, clabel, boxes = TRUE, coul = rep(1, length(x)),
    horizontal = TRUE)
{
    if (length(label) == 0)
        return(invisible())
    if (is.null(label))
        return(invisible())
    if (any(label == ""))
        return(invisible())
    cex0 <- par("cex") * clabel
    for (i in 1:(length(x))) {
        cha <- as.character(label[i])
        cha <- paste(" ", cha, " ", sep = "")
        x1 <- x[i]
        y1 <- y[i]
        xh <- strwidth(cha, cex = cex0)
        yh <- strheight(cha, cex = cex0) * 5/3
        if (!horizontal) {
            tmp <- scatterutil.convrot90(xh, yh)
            xh <- tmp[1]
            yh <- tmp[2]
        }
        if (boxes) {
            rect(x1 - xh/2, y1 - yh/2, x1 + xh/2, y1 + yh/2,
                col = "white", border = coul[i])
        }
        if (horizontal) {
            text(x1, y1, cha, cex = cex0, col = coul[i])
        }
        else {
            text(x1, y1, cha, cex = cex0, col = coul[i], srt = 90)
        }
    }
}
    f1 <- function(cl) {
        n <- length(cl)
        cl <- as.factor(cl)
        x <- matrix(0, n, length(levels(cl)))
        x[(1:n) + n * (unclass(cl) - 1)] <- 1
        dimnames(x) <- list(names(cl), levels(cl))
        data.frame(x)
    }
    opar <- par(mar = par("mar"))
    par(mar = c(0.1, 0.1, 0.1, 0.1))
    on.exit(par(opar))
    dfxy <- data.frame(dfxy)
    if (!is.data.frame(dfxy))
        stop("Non convenient selection for dfxy")
    if (any(is.na(dfxy)))
        stop("NA non implemented")
    if (!is.factor(fac))
        stop("factor expected for fac")
    dfdistri <- f1(fac) * wt
    coul = col
    w1 <- unlist(lapply(dfdistri, sum))
    dfdistri <- t(t(dfdistri)/w1)
    coox <- as.matrix(t(dfdistri)) %*% dfxy[, xax]
    cooy <- as.matrix(t(dfdistri)) %*% dfxy[, yax]
    if (nrow(dfxy) != nrow(dfdistri))
        stop(paste("Non equal row numbers", nrow(dfxy), nrow(dfdistri)))
    coo <- scatterutil.base(dfxy = dfxy, xax = xax, yax = yax,
        xlim = xlim, ylim = ylim, grid = grid, addaxes = addaxes,
        cgrid = cgrid, include.origin = include.origin, origin = origin,
        sub = sub, csub = csub, possub = possub, pixmap = pixmap,
        contour = contour, area = area, add.plot = add.plot)
    if (clabel > 0)
        scatterutil.eti(coox, cooy, label, clabel, coul = col)
    box()
    invisible(match.call())
}

Now let look at the example provided with the ade4 function dudi.pca

library(ade4)
data(deug)
pca1 <- dudi.pca(deug$tab, scale = FALSE, scan = FALSE)

We now construct different vectors that will be necessary later (this code is necessary because of the data structure of deug)

colv<-rep("black",times=length(as.numeric(deug$result)))
pchv<-rep(1,times=length(as.numeric(deug$result)))
cc<-topo.colors(length(unique(as.numeric(deug$result))))

and use these vectors to create new color and pch vectors that will be used for plotting:

for (j in 1:(length(unique(ccn))) ){
   ww<-which(ccn==unique(ccn)[j])
   colv[ww]<-cc[unique(ccn)][j] ; pchv[ww]<-cc2[j]
}

We now plot the usual scatter plot using s.class

s.class(pca1$li,deug$result,grid=F,cgrid=0,csub=2,cellipse=0,col=cc,cpoint = 0)

output of the s.class function

and superimpose the symbols using s.label.bis

s.label.bis(pca1$li, pch=pchv, clabel = 0, col=colv, add.plot = T)

output of the s.label.bis function

The modified s.class.bis function can be used to redraw the class labels :

s.class.bis(pca1$li, deug$result,csub = 2,cellipse=0, col=cc, cpoint = 0,label=levels(deug$result), add.plot = T)

s.label.bis and s.class.bis can also be used to draw color scatter class graphics such as this:

Advertisements