User Tools

Site Tools


wiki:rstat:rparale

Parallel R using foreach


Material prepared by Stephen Weston

The object of the exercise is to calculate the Moran's I index (using R) inside the polygons of the poly_fr_10poly.shp shape file.

http://www.spatial-ecology.net/ost4sem/lecture/pr.pdf
https://cran.r-project.org/web/packages/foreach/vignettes/foreach.pdf




for this exercise you can enter in this directory

cd /home/user/ost4sem/exercise/basic_adv_gdalogr/

and download this scripts

wget https://raw.githubusercontent.com/selvaje/spatial-ecology-codes/master/wiki/R/moran_par1.R
wget https://raw.githubusercontent.com/selvaje/spatial-ecology-codes/master/wiki/R/moran_par2.R
wget https://raw.githubusercontent.com/selvaje/spatial-ecology-codes/master/wiki/R/moran_par3.R
wget https://raw.githubusercontent.com/selvaje/spatial-ecology-codes/master/wiki/R/moran_par4.R
wget https://raw.githubusercontent.com/selvaje/spatial-ecology-codes/master/wiki/R/moran_seq1.R
wget https://raw.githubusercontent.com/selvaje/spatial-ecology-codes/master/wiki/R/moran_seq2.R


 cd /home/user/ost4sem/exercise/basic_adv_gdalogr/fagus_sylvatica/*10poly.*  /home/user/ost4sem/exercise/basic_adv_gdalogr/


Sequential for loop

wiki/R/moran_seq1.R
suppressMessages(library(raster))
suppressMessages(library(rgdal))
 
# Returns "RasterLayer"
rasterD <- raster('fagus_sylvatica/2020_TSSP_IP-ENS-A2-SP20_43023435.tif')
 
# Returns "SpatialPolygonsDataFrame"
s <- readOGR('poly_fr_10poly.shp', 'poly_fr_10poly')
npoly <- nrow(s)
 
# Sequential for loop
Moran <- double(npoly)
for (i in 1:npoly) {
  p <- s[i,]                                # SpatialPolygonsDataFrame
  rasterDpMask <- mask(rasterD, p)          # RasterLayer
  Moran[i] <- Moran(rasterDpMask)
}
res <- data.frame(id=s@data$id, Moran=Moran)
print(res)

Insert the xy crop in the Sequential for loop. The mask command now is performed on smaller raster dataset

wiki/R/moran_seq2.R
suppressMessages(library(raster))
suppressMessages(library(rgdal))
 
# Returns "RasterLayer"
rasterD <- raster('fagus_sylvatica/2020_TSSP_IP-ENS-A2-SP20_43023435.tif')
 
# Returns "SpatialPolygonsDataFrame"
s <- readOGR('poly_fr_10poly.shp', 'poly_fr_10poly')
npoly <- nrow(s)
 
# Sequential for loop
Moran <- double(npoly)
for (i in 1:npoly) {
  p <- s[i,]                                # SpatialPolygonsDataFrame
  rasterDp <- crop(rasterD, p, snap='out')  # RasterLayer
  rasterDpMask <- mask(rasterDp, p)         # RasterLayer
  Moran[i] <- Moran(rasterDpMask)
}
res <- data.frame(id=s@data$id, Moran=Moran)
print(res)

Foreach loop that "sends" the full shape file and the full raster to the workers

wiki/R/moran_par1.R
suppressMessages(library(raster))
suppressMessages(library(rgdal))
suppressMessages(library(doParallel))
suppressMessages(library(itertools))
 
nw <- detectCores()
cl <- makePSOCKcluster(nw)
registerDoParallel(cl)
 
# Returns "RasterLayer"
rasterD <- raster('fagus_sylvatica/2020_TSSP_IP-ENS-A2-SP20_43023435.tif')
 
# Returns "SpatialPolygonsDataFrame"
s <- readOGR('poly_fr_10poly.shp', 'poly_fr_10poly')
npoly <- nrow(s)
 
# Simplistic parallel foreach loop
Moran <-
  foreach(i=1:npoly, .combine='c',
          .packages=c('raster', 'rgdal')) %dopar% {
    p <- s[i,]
    rasterDp <- crop(rasterD, p, snap='out')
    rasterDpMask <- mask(rasterDp, p)
    Moran(rasterDpMask)
  }
res <- data.frame(id=s@data$id, Moran=Moran)
print(res)

Foreach loop that "sends" only one polygon per task and the full raster to the workers

wiki/R/moran_par2.R
suppressMessages(library(raster))
suppressMessages(library(rgdal))
suppressMessages(library(doParallel))
suppressMessages(library(itertools))
 
nw <- detectCores()
cl <- makePSOCKcluster(nw)
registerDoParallel(cl)
 
# Returns "RasterLayer"
rasterD <- raster('fagus_sylvatica/2020_TSSP_IP-ENS-A2-SP20_43023435.tif')
 
# Returns "SpatialPolygonsDataFrame"
s <- readOGR('poly_fr_10poly.shp', 'poly_fr_10poly')
npoly <- nrow(s)
 
# Parallel, iterating over rows themselves 
Moran <-
  foreach(p=isplitRows(s, chunkSize=1), .combine='c',
          .packages=c('raster', 'rgdal')) %dopar% {
    rasterDp <- crop(rasterD, p, snap='out')
    rasterDpMask <- mask(rasterDp, p)
    Moran(rasterDpMask)
  }
res <- data.frame(id=s@data$id, Moran=Moran)
print(res)

Foreach loop that "sends" a chunk of polygons and the full raster to the workers

wiki/R/moran_par3.R
suppressMessages(library(raster))
suppressMessages(library(rgdal))
suppressMessages(library(doParallel))
suppressMessages(library(itertools))
 
nw <- detectCores()
cl <- makePSOCKcluster(nw)
registerDoParallel(cl)
 
# Returns "RasterLayer"
rasterD <- raster('fagus_sylvatica/2020_TSSP_IP-ENS-A2-SP20_43023435.tif')
 
# Returns "SpatialPolygonsDataFrame"
s <- readOGR('poly_fr_10poly.shp', 'poly_fr_10poly')
npoly <- nrow(s)
 
# Parallel, one task chunk per worker
Moran <-
  foreach(ss=isplitRows(s, chunks=nw), .combine='c',
          .packages=c('raster', 'rgdal')) %dopar% {
    npoly <- nrow(ss)
    Moran <- double(npoly)
    for (i in 1:npoly) {
      rasterDp <- crop(rasterD, ss[i,], snap='out')
      rasterDpMask <- mask(rasterDp, ss[i,])
      Moran[i] <- Moran(rasterDpMask)
    }
    Moran
  }
res <- data.frame(id=s@data$id, Moran=Moran)
print(res)

Identify polygon groups, using the centroid in kmean cluster

Foreach loop that “sends” polygons that belong to the same cluster and a cropped raster (rasterDp) to the workers

wiki/R/moran_par4.R
suppressMessages(library(raster))
suppressMessages(library(rgdal))
suppressMessages(library(doSNOW))
 
nw <- parallel::detectCores()
cl <- makeSOCKcluster(nw)
registerDoSNOW(cl)
 
# Simple parallel kmeans
pkmeans <- function(x, centers, iter.max=10, nstart=getDoParWorkers()) {
  # Combine function returns best result seen so far
  best <- function(...) {
    results <- list(...)
    i <- sapply(results, function(result) result$tot.withinss)
    results[[which.min(i)]]
  }
 
  # Perform a kmeans on each worker by splitting up nstart
  foreach(ns=idiv(nstart, chunks=getDoParWorkers()),
          .combine='best', .multicombine=TRUE) %dopar% {
    kmeans(x, centers, iter.max=iter.max, nstart=ns)
  }
}
 
# Return polygons by cluster
getcluster <- function(spdf, centers) {
  # Assume each feature has a single polygon
  npoly <- nrow(spdf)
  if (npoly > centers) {
    x <- t(sapply(1:npoly, function(i) spdf[i,]@polygons[[1]]@labpt))
    pkmeans(x, centers=centers)$cluster
  } else {
    1:npoly
  }
}
 
ipolygons <- function(spdf, f) {
  it <- isplit(1:nrow(spdf), f)
 
  nextEl <- function() {
    x <- nextElem(it)
    list(value=spdf[x$value,], key=x$key)
  }
 
  object <- list(nextElem = nextEl)
  class(object) <- c('abstractiter', 'iter')
  object
}
 
print(proc.time())
 
cat('readOGR\n')
s <- readOGR('clc06_c311/clc06_c311.shp', 'clc06_c311')
print(proc.time())
 
cat('kmeans\n')
clusters <- getcluster(s, 1000 * nw)
print(proc.time())
 
cat('Moran\'s I\n')
pb <- txtProgressBar(max=length(unique(clusters)), style=3)
progress <- function(n) setTxtProgressBar(pb, n)
opts <- list(progress=progress)
 
res <-
  foreach(ssl=ipolygons(s, clusters), .combine='rbind', .inorder=FALSE,
          .options.snow=opts, .errorhandling='remove',
          .packages=c('raster', 'rgdal')) %dopar% {
    lrasterD <- raster('2020_TSSP_IM-ENS-A2-SP20_43023435.tif')
    ss <- ssl$value
    rasterDp <- crop(lrasterD, ss, snap='out')
    npoly <- nrow(ss)
    Moran <- double(npoly)
    for (i in 1:npoly) {
      x <- crop(rasterDp, ss[i,], snap='out')
      rasterDpMask <- mask(x, ss[i,])
      Moran[i] <- Moran(rasterDpMask)
    }
    data.frame(id=ss@data$id, cluster=ssl$key[[1]], Moran=Moran)
  }
 
cat(sprintf('\nGot %d results\n', nrow(res)))
print(proc.time())
wiki/rstat/rparale.txt · Last modified: 2019/04/05 20:22 by new_ost4sem_admin