################################################################################ ### Appendix 4. Beale et al. 2007 ### ### ### ### A sampling function to efficiently sample points with a given separation ### ### distance estimated, for example, by the scale of spatial autocorrelation ### ### in a dataset. This is not identical to the program described in Hawkins ### ### et al. 2007, but is a far more computationally efficient approximation. ### ################################################################################ hexGrid <- function (mat = y1, ACsize = 8) { ### Function to select a random set of cells within a matrix that form the ### points of a fully packed hexagonal grid, each cell at least ACsize apart. ### Requires a matrix, mat, for which indices need to be calculated and an ### integer, ACsize, describing the minimum distance between selected points. ### ### The function works by first selecting a random starting location within ### the minimum distance of the top left corner, defining a regular, horizontal ### grid from this point, then rotating the grid through a randomly selected ### angle. dims <- dim (mat) # calculate dimensions of the matrix. x1 <- sample (1:ACsize, 1) # select random starting x value. y1 <- sample (1:ACsize, 1) # select random starting y value. xs1 <- seq(x1, dims[1] * sqrt(2), by = ACsize) # generate a sequence of x values each ACsize # away from the provious point. x2 <- x1 - floor(0.5 * ACsize) # locate the x value half way between the first # two points. xs2 <- seq(x2, dims[1] * sqrt(2), by = ACsize) # generate a sequence of x values half-way # between the previous sequence. Ystep <- ceiling (sqrt ((ACsize^2) - ((0.5 * ACsize)^2))) # calculate the vertical distance between points. lenY <- ceiling (2*dims[2] * sqrt(2) / Ystep) # estimate the length of the y values needed, # including sufficient buffer to allow # rotation. ys <- seq(y1 - lenY * Ystep/2, dims[2] * sqrt(2), by = Ystep) # generate a sequence of y values each Ystep # apart. yInds <- sort(c(rep((1:ceiling(lenY/2))*2, each = length(xs2)), # generate the indices of y values needed to rep((((1:ceiling(lenY/2)))*2)-1, each = length(xs1)))) # match the length of the x values. locs <- cbind(rep(c(xs1,xs2), ceiling(lenY/2)), ys[yInds]) # combine all x and y values into a matrix # defining the horizontal grid. locs <- locs[!is.na(rowSums(locs)),] # remove any rows where x and y values are # not within the required dimensions. ang <- sample(seq(0, pi/2, length = 90), 1) # generate a random angle bewteen 0 and pi/2. Dlocs <- locs # copy the location matrix. Dlocs[,1] <- round(cos(ang) * locs[,1] - sin(ang) * locs[,2]) # rotate the x values by ang. Dlocs[,2] <- round(sin(ang) * locs[,1] + cos(ang) * locs[,2]) # rotate the y values by ang. Dlocs <- Dlocs[Dlocs[,1] %in% 1:dims[1],] # remove any x values outside the required # dimensions. Dlocs <- Dlocs[Dlocs[,2] %in% 1:dims[2],] # remove any y values outside the required # dimensions. return(Dlocs) # return the matrix of rotated locations. } ################################################################################ ### Functions to calculate Moran's I efficiently on a matrix of values. ### ### ### ### Calculations are performed with moranI for raw data, or moranIresid for ### ### residuals, which are called by the main function matrixMoran. For ### ### repeated calculations of Moran's I on multiple datasets of the same ### ### grid size matrixMoranSetUp will calculate several slow preliminaries on ### ### the first dataset that can be passed to all subsequent analyses with ### ### matrixMoran. ### ### written C. Beale, July 2007. ### ################################################################################ ################################################################################ ### Setup function. ### ################################################################################ matrixMoranSetUp <- function (mat = y1, equal.lag.dist = FALSE, nbins = 10) { ### Function to calculate bins and distances on a grid that can be passed to ### matrixMoran when making repeated calculations on the same dimension matrix. ### requires a matrix or data.frame (mat) with the same dimensions as the matrix ### that all subsequent analyses will be carried out on. equal.lag.dist is a ### logical parameter indicating whether the distances between bins for which ### Moran's I will be calculated should be equal or should attempt to equalise ### the sample size within bins. nbins indicates the number of bins for ### calculating Moran's I. size <- dim (mat) # calculate dimensions. coords <- cbind(rep(1:size[1], times = size[2]), rep(1:size[2], each = size[1])) # generate coordinates for matrix locations. dists <- dist(coords) # calculate geographic distances between # matrix locations. bins <- as.matrix(dist(coords, diag = F, upper = T)) # create matrix to hold distance categories. dists1 <- bins # copy trangular distance matrix. if(!equal.lag.dist) { # If equal.lag.dist = FALSE, calculate distances cuts <- quantile(as.vector(dists)/max(dists, na.rm = T), seq(0.1, 1, # that generate bins with approximately equal length.out = nbins)) # numbers of squares. for (i in nbins:1) bins[dists1/max(dists1, na.rm = T)<=cuts[i]] <- i # fill matrix of bins with appropriate values # based on distance categories. } else { # If equal.lag.dist = TRUE, calculate distances cuts <- seq(min(dists, na.rm = T), max(dists, na.rm = T), length.out = # as a regularly sized sequence of length nbins) # nbins. for (i in nbins:1) bins[dists1<=cuts[i]] <- i # fill matrix of bins with appropriate values # based on distance categories. } return(list(bins = bins, size = size, cuts = cuts)) } ################################################################################ ### The main function. ### ################################################################################ matrixMoran <- function (mat = y1, equal.lag.dist = FALSE, nbins = 10, plot.it = FALSE, useStartUp = Null, resids = FALSE) { ### The main function calcualting Moran's I for a matrix. mat is the matrix or ### data.frame containing the values of interest. equal.lag.dist is a logical ### parameter indicating whether the distances between bins for which Moran's I ### will be calculated should be equal or should attempt to equalise ### the sample size within bins. nbins indicates the number of bins for ### calculating Moran's I. plot.it is a logical flag indicatign whether the ### correlogram should be plotted after calculation. useStartUp will accept an ### object returned by matrixMoranSetUp to speed up repeated calculations. ### resids is a logical flag indicating whether the matrix of values are ### residuals from a model or raw values. if (is.null(useStartUp)) { # if setup values for bins, size and distance useStartUp <- matrixMoranSetUp(mat = mat, equal.lag.dist = equal.lag.dist, # categories are not provided, calculate nbins = 10) # these. } bins <- useStartUp$bins size <- useStartUp$size cuts <- useStartUp$cuts allpairs <- cbind(rep(as.vector(mat), times = prod(size)), rep(as.vector(mat), # create a matrix with all pairs of values. each = prod(size))) out <- matrix(ncol = 2, nrow = nbins) # create a matrix to hold output values. colnames(out) <- c("dist", "I") for (i in 1: nbins) { # for each bin in turn calculate Moran's I using print(paste("starting bin", i)) # either moranIresid if resids = TRUE or if(resids) { # moranI if resids = FALSE. out[i,2] <- moranIresid(y = allpairs, interval = i, W = as.vector(bins == i), n = prod(size)) } else { out[i,2] <- moranI(y = allpairs, interval = i, W = as.vector(bins == i), n = prod(size)) } } out[,1] <- cuts # paste distance categories into output. if(plot.it) { # if plot.it = TRUE, plot the correlogram. plot(cuts, out[,2], xlab = "distance", ylab = "Moran's I") lines(c(min(cuts), max(cuts)), c(0.05/nbins,0.05/nbins)) } return(list (results = out)) } ################################################################################ ### Moran's I calculation for raw data ### ################################################################################ moranI <- function (y = allpairs, interval = 1, W = as.vector(bins == interval), n = prod(size), S = sum(W, na.rm = T)) { yHat = mean(as.vector(y), na.rm = T) Ival <- (n/S) * (sum(((y[,1] - yHat) * (y[,2] - yHat))[W], na.rm = T)) / sum ((y[,1][1:n] - yHat)^2, na.rm = T) return(Ival) } ################################################################################ ### Moran's I calculation for residuals ### ################################################################################ moranIresid <- function (y = allpairs, interval = 1, W = as.vector(bins == interval), n = prod(size), S = sum(W, na.rm = T)) { Ival <- (n/S) * (sum(((y[,1]) * (y[,2]))[W], na.rm = T)) / sum ( (y[,1][1:n])^2, na.rm = T) return(Ival) }