viernes, 10 de diciembre de 2010

Análisis geoestadístico con R. Paquete geoR.

Ordenes básicas del paquete geoR para el análisis de datos geoestadísiticos, recopilados del material "geoR: PAckage for Geostatistical Data Analysis. An illustrative session" por Paulo J. Ribeiro Jr. & Peter J. Diggle, 2006.
Pronto subiré también análisis completos con un conjunto de datos de R así como contenido teórico de este tema. Espero les guste...

################################################################################
### geoR : Package for Geostatistical Data Analysis An illustrative session#####
### Paulo J. Ribeiro Jr. & Peter J. Diggle. 2006 ###############################
### geoRintro_package%examples.pdf #######
##########################################
 
  library(geoR)
  data(s100)
 
# 3) herramientas exploratorias
  #1. gráficos
  summary(s100)
  plot(s100) #para objetos de clase "geodata" produce 4 gráficos


 par(mfrow = c(2, 2))
 points(s100, xlab = "Coord X", ylab = "Coord Y")
 points(s100, xlab = "Coord X", ylab = "Coord Y", pt.divide = "rank.prop")
 points(s100, xlab = "Coord X", ylab = "Coord Y", cex.max = 1.7, col = gray(seq(1, 0.1, l = 100)), pt.divide = "equal")
 points(s100, pt.divide = "quintile", xlab = "Coord X", ylab = "Coord Y")
#2. variograma empírico cloud1 <- variog(s100, option = "cloud", max.dist = 1) cloud2 <- variog(s100, option = "cloud", estimator.type = "modulus", max.dist = 1) bin1 <- variog(s100, uvec = seq(0, 1, l = 11)) bin2 <- variog(s100, uvec = seq(0, 1, l = 11), estimator.type = "modulus") par(mfrow = c(2, 2)) plot(cloud1, main = "classical estimator"); plot(cloud2, main = "modulus estimator") plot(bin1, main = "classical estimator"); plot(bin2, main = "modulus estimator") names(bin1) #los tres primeros resultados indican las distancias, semivarianza estimada y el número de pares para cada bin

 
 bin1 <- variog(s100, uvec = seq(0, 1, l = 11), bin.cloud = T) #los puntos del variograma pueden agruparse en clases de distancias y graficarse en box-plot para cada una
 bin2 <- variog(s100, uvec = seq(0, 1, l = 11), estimator.type = "modulus", bin.cloud = T)
 par(mfrow = c(1, 2))
 plot(bin1, bin.cloud = T, main = "classical estimator"); plot(bin2, bin.cloud = T, main = "modulus estimator")

 
 vario60 <- variog(s100, max.dist = 1, direction = pi/3) #podemos calcular los variogramas direccionales utilizando el argumento "direction" y "tolerance", por ejemplo calculamos el variograma para la direcciónd e 60 grados con el ángulo de tolerancia por default (22.5 grados).
 vario.4 <- variog4(s100, max.dist = 1) #cálculo en 4 direcciones
 par(mfrow = c(1, 2), mar = c(3, 3, 1.5, 0.5))
 plot(vario60); title(main = expression(paste("directional, angle = ", 60 * degree))); plot(vario.4, lwd = 2)

 
# 4) estimación de parámetros
 #comparamos variogramas teóricos y empíricos visualmente
 bin1 <- variog(s100, uvec = seq(0, 1, l = 11))
 plot(bin1)
 lines.variomodel(cov.model = "exp", cov.pars = c(1, 0.3), nugget = 0, max.dist = 1, lwd = 3)
 smooth <- variog(s100, option = "smooth", max.dist = 1, n.points = 100, kernel = "normal", band = 0.2)
 lines(smooth, type = "l", lty = 2)
 legend(0.4, 0.3, c("empirical", "exponential model", "smoothed"), lty = c(1, 1, 2), lwd = c(1, 3, 1))


 #pero en la práctica no conocemos los valores reales y debemos estimarlos: 1) a ojo, o 2) por mínimos cuadrados ajustados al variogramas empíricos (OLS  y WLS), 3) métodos basados en verosimilitud (ML o REML), o 4) métodos bayesianos
 #1) a ojo
 plot(variog(s100, max.dist = 1))
 lines.variomodel(cov.model = "exp", cov.pars = c(1, 0.3), nug = 0, max.dist = 1)
 lines.variomodel(cov.model = "mat", cov.pars = c(0.85, 0.2), nug = 0.1, kappa = 1, max.dist = 1, lty = 2)
 lines.variomodel(cov.model = "sph", cov.pars = c(0.8, 0.8), nug = 0.1, max.dist = 1, lwd = 2)
#función de estimación de parámetro
ml <- likfit(s100, ini = c(1, 0.5)) ; ml ; summary(ml) # Fitting models with nugget fixed to zero ml <- likfit(s100, ini = c(1, 0.5), fix.nugget = T) reml <- likfit(s100, ini = c(1, 0.5), fix.nugget = T, method = "RML") ols <- variofit(bin1, ini = c(1, 0.5), fix.nugget = T, weights = "equal") wls <- variofit(bin1, ini = c(1, 0.5), fix.nugget = T) # Fitting models with a fixed value for the nugget ml.fn <- likfit(s100, ini = c(1, 0.5), fix.nugget = T, nugget = 0.15) reml.fn <- likfit(s100, ini = c(1, 0.5), fix.nugget = T, nugget = 0.15, method = "RML") ols.fn <- variofit(bin1, ini = c(1, 0.5), fix.nugget = T, nugget = 0.15, weights = "equal") wls.fn <- variofit(bin1, ini = c(1, 0.5), fix.nugget = T, nugget = 0.15) # Fitting models estimated nugget ml.n <- likfit(s100, ini = c(1, 0.5), nug = 0.5) reml.n <- likfit(s100, ini = c(1, 0.5), nug = 0.5, method = "RML") ols.n <- variofit(bin1, ini = c(1, 0.5), nugget = 0.5, weights = "equal") wls.n <- variofit(bin1, ini = c(1, 0.5), nugget = 0.5) #graficamos los modelos ajustados sobre los variogramas empírivos par(mfrow = c(1, 3)) plot(bin1, main = expression(paste("fixed ", tau^2 == 0))) lines(ml, max.dist = 1) lines(reml, lwd = 2, max.dist = 1) lines(ols, lty = 2, max.dist = 1) lines(wls, lty = 2, lwd = 2, max.dist = 1) legend(0.5, 0.3, legend = c("ML", "REML", "OLS", "WLS"), lty = c(1, 1, 2, 2), lwd = c(1, 2, 1, 2), cex = 0.7) plot(bin1, main = expression(paste("fixed ", tau^2 == 0.15))) lines(ml.fn, max.dist = 1) lines(reml.fn, lwd = 2, max.dist = 1) lines(ols.fn, lty = 2, max.dist = 1) lines(wls.fn, lty = 2, lwd = 2, max.dist = 1) legend(0.5, 0.3, legend = c("ML", "REML", "OLS", "WLS"), lty = c(1, 1, 2, 2), lwd = c(1, 2, 1, 2), cex = 0.7) plot(bin1, main = expression(paste("estimated ", tau^2))) lines(ml.n, max.dist = 1) lines(reml.n, lwd = 2, max.dist = 1) lines(ols.n, lty = 2, max.dist = 1) lines(wls.n, lty = 2, lwd = 2, max.dist = 1) legend(0.5, 0.3, legend = c("ML", "REML", "OLS", "WLS"), lty = c(1, 1, 2, 2), lwd = c(1, 2, 1, 2), cex = 0.7)
 

#Two kinds of variogram envelopes computed by simulation are illustrated in the figure below.
#The plot on the left-hand side shows an envelope based on permutations of the data values across the locations, i.e. envelopes built under the assumption of no spatial correlation.
#The envelopes shown on the right-hand side are based on simulations from a given set of model parameters, in this example the parameter estimates from the WLS variogram fit. This envelope shows the variability of the empirical variogram.
 env.mc <- variog.mc.env(s100, obj.var = bin1)
 env.model <- variog.model.env(s100, obj.var = bin1, model = wls)
#Profile likelihoods (1-D and 2-D) are computed by the function proflik. Here we show the profile likelihoods for the covariance parameters of the model without nugget effect
 par(mfrow = c(1, 2))
 plot(bin1, envelope = env.mc)
 plot(bin1, envelope = env.model)
 
#5) validación cruzada
 #cross-validation using the leaving-one-out: data points are removed one by one and predicted by kriging using the remaining data.
 #The commands below illustrates cross-validation for the models fitted by maximum likelihood and weighted least squares.
 xv.ml <- xvalid(s100, model = ml)
 xv.wls <- xvalid(s100, model = wls)
 #Graphical results are shown for the cross-validation results where the leaving-one-out strategy combined with the wls estimates for the parameters was used. Cross-validation
    #residuals are obtained subtracting the observed data minus the predicted value. Standardised residuals are obtained dividing by the square root of the prediction variance (‘kriging
    #variance’). By default the 10 plots shown in the Figure 10 are produced but the user can restrict the choice using the function arguments.
 prof <- proflik(ml, geodata = s100, sill.val = seq(0.48, 2, l = 11), range.val = seq(0.1, 0.52, l = 11), uni.only = FALSE)
 par(mfrow = c(1, 3)); plot(prof, nlevels = 16)

 par(mfcol = c(5, 2), mar = c(3, 3, 1, 0.5), mgp = c(1.5, 0.7, 0))
 plot(xv.wls)


 #A variation of this method is illustrated by the next two calls where the model parameters are re-estimated each time a point is removed from the data-set.

 xvR.ml <- xvalid(s100, model = ml, reest = TRUE)
 xvR.wls <- xvalid(s100, model = wls, reest = TRUE, variog.obj = bin1)
 
#6) interpolación espacial. Kriging con las siguientes opciones:
   #1. Simple kriging
   #2. Ordinary kriging
   #3. Trend (universal) kriging
   #4. External trend kriging
 #First example consider the prediction at four locations labeled 1, 2, 3, 4 and indicated in the figure below. The command to perform ordinary kriging using the parameters estimated by weighted least squares with nugget fixed to zero would be:
 kc4 <- krige.conv(s100, locations = loci, krige = krige.control(obj.m = wls))
 #Second example: The goal is to perform prediction on a grid covering the area and to display the results. Again, we use ordinary kriging.
 plot(s100$coords, xlim = c(0, 1.2), ylim = c(0, 1.2), xlab = "Coord X", ylab = "Coord Y")
 loci <- matrix(c(0.2, 0.6, 0.2, 1.1, 0.2, 0.3, 1, 1.1), ncol = 2)
 text(loci, as.character(1:4), col = "red")
 polygon(x = c(0, 1, 1, 0), y = c(0, 0, 1, 1), lty = 2)
 pred.grid <- expand.grid(seq(0, 1, l = 51), seq(0, 1, l = 51))
 kc <- krige.conv(s100, loc = pred.grid, krige = krige.control(obj.m = ml))
 image(kc, loc = pred.grid, col = gray(seq(1, 0.1, l = 30)), xlab = "Coord X", ylab = "Coord Y")
 
# 7) análisis bayesiano #consider a model without nugget and including uncertainty in the mean, sill and range parameters. Prediction at the four locations indicated above is performed by typing a command like: #predictive distributions at the four selected locations. Dashed lines show Gaussian distributions with mean and variance given by results of ordinary #kriging obtained in Section 4. The full lines correspond to the Bayesian prediction. The plot shows results of density estimation using samples from the predictive distributions. bsp4 <- krige.bayes(s100, loc = loci, prior = prior.control(phi.discrete = seq(+ 5, l = 101), phi.prior = "rec"), output = output.control(n.post = 5000)) par(mfrow = c(2, 2)) for (i in 1:4) { kpx <- seq(kc4$pred[i] - 3 * sqrt(kc4$krige.var[i]), kc4$pred[i] + 3 * sqrt(kc4$krige.var[i]), l = 100) kpy <- dnorm(kpx, mean = kc4$pred[i], sd = sqrt(kc4$krige.var[i])) bp <- density(bsp4$predic$simul[i, ]) rx <- range(c(kpx, bp$x)) ry <- range(c(kpy, bp$y)) par(mfrow = c(1, 3), mar = c(3, 3, 1, 0.5), mgp = c(2, 1, 0)) hist(bsp4$posterior$sample$beta, main = "", xlab = expression(beta), prob = T) hist(bsp4$posterior$sample$sigmasq, main = "", xlab = expression(sigma^2), prob = T) hist(bsp4$posterior$sample$phi, main = "", xlab = expression(phi), prob = T) plot(cbind(rx, ry), type = "n", xlab = paste("Location", i), ylab = "density", xlim = c(-4, 4), ylim = c(0, 1.1)) lines(kpx, kpy, lty = 2); lines(bp) }   #Consider now, under the same model assumptions, obtaining simulations from the predictive distributions on a grid of points covering the area. The commands to define the grid and perform Bayesian prediction are: pred.grid <- expand.grid(seq(0, 1, l = 31), seq(0, 1, l = 31)) bsp <- krige.bayes(s100, loc = pred.grid, prior = prior.control(phi.discrete + 5, l = 51)), output = output.control(n.predictive = 2)) plot(bin1, ylim = c(0, 1.5)) lines(bsp4, max.dist = 1.2, summ = mean) lines(bsp4, max.dist = 1.2, summ = median, lty = 2) lines(bsp4, max.dist = 1.2, summ = "mode", post = "par", lwd = 2, lty = 2) legend(0.25, 0.4, legend = c("variogram posterior mean", "variogram posterior median", "parameters posterior mode"), lty = c(1, 2, 2), lwd = c(1, 1, 2), cex = 0.8)   # 8) simulaciones de campos aleatorios gaussianos: (mejor usar paquete RandomFields) #The function grf generates simulations of Gaussian random fields on regular or irregular sets of locations. It relies on the decomposition of the covariance matrix and therfore #won’t work for large number of locations in which case we suggest the usage of the package RandomFields. Some of its functionality is illustrated by the next commands. #mapas obtenidos de la distribución predictiva par(mfrow = c(2, 2), mar = c(3, 3, 1, 0.5), mgp = c(1.5, 0.7, 0)) image(bsp, loc = pred.grid, main = "predicted", col = gray(seq(1, 0.1, l = 30))) image(bsp, val = "variance", loc = pred.grid, main = "prediction variance", col = gray(seq(1, 0.1, l = 30))) image(bsp, val = "simulation", number.col = 1, loc = pred.grid, main = "a simulation from\nthe predictive distribution", col = gray(seq(1, 0.1, l = 30))) image(bsp, val = "simulation", number.col = 2, loc = pred.grid, main = "another simulation from \n the predictive distribution", col = gray(seq(1, 0.1, l = 30))) #resultados de simulación producidos con la función "grf" par(mfrow = c(1, 2)) sim1 <- grf(100, cov.pars = c(1, 0.25)) points.geodata(sim1, main = "simulated locations and values") plot(sim1, max.dist = 1, main = "true and empirical variograms") #simulaciones con diferentes resoluciones par(mfrow = c(1, 2)) sim2 <- grf(441, grid = "reg", cov.pars = c(1, 0.25)) image(sim2, main = "a small-ish simulation", col = gray(seq(1, 0.1, l = 30)))     ######################################################################

3 comentarios:

  1. Hola
    Quisiera saber la diferencia entre "variofit" y "likfit", parece que primero ajusta un modelo al variograma y el segundo a los datos? en ese caso cual es conveniente usar para luego ahcer kriging?
    gracias

    ResponderEliminar
  2. Ups, perdón por la demora para contestarte, no me llegó la notificación del comentario. La principal diferencia entre "variofit" y "likfit" es que: "variofit" ajusta variogramas para mínimos cuadrados (OLS o WLS), mientras que "likfit" estima los parámetros basados en la máxima verosimilitud (ML o REML). Puedes utilizar cualquiera de los dos para hacer el Kriging. Mira en la ayuda (http://rss.acs.unt.edu/Rdoc/library/geoR/html/krige.conv.html): tienes que incluirlo como "obj.model": a list with the model parameters. Typically an output of likfit or variofit.
    Saludos y gracias por participar.

    ResponderEliminar
  3. Hola. Querría saber como comparar dos modelos (exponencial y esferico) mediante los graficos producidos por plot.xvalid. No se como darme cuenta cual es "mejor". muchas gracias

    ResponderEliminar

Libros para descargar (gratis) sobre Modelos Espacio-Temporales