Calculate evaluation measures for multiple models

If you have a list of models created e.g. with the multGLM function, or a data frame with presence-absence data and the corresponding predicted values for a set of species, you can use the multModEv function to get a set of evaluation measures for all models simultaneously. Note that all models must have the same sample size. This function is included in the modEvA package (Barbosa et al. 2014).

multModEv.methods <- 'Prevalence', 'AUC', 'CCR', 'Misclass', 'Sensitivity', 'Specificity', 'Omission', 'Commission', 'PPP', 'NPP', 'UPR', 'OPR', 'PPI', 'PAI', 'kappa', 'TSS', 'NMI', 'OddsRatio', 'HL', 'HL.p', 'RMSE', 'Miller.int', 'Miller.slope', 'Miller.p'
 
multModEv <-
function(models = NULL, obs.data = NULL, pred.data = NULL, measures = multModEv.methods, standardize = FALSE, thresh = 0.5, bin.method = "quantiles", quiet = TRUE) {
  # version 2.0 (24 Jun 2015)
 
  for (m in measures) {
    if (!(m %in% modEvAmethods("multModEv"))) stop(m, " is not a valid measure; type modEvAmethods('multModEv') for available options.")
  }
 
  if (!bin.method %in% modEvAmethods("getBins")) stop("Invalid bin.method; type modEvAmethods('getBins') for available options.")
 
  if (is.null(models)) {
    if (is.null(obs.data) | is.null(pred.data)) stop("You must provide either a list of model object(s) of class 'glm', or a set of obs.data + pred.data with matching dimensions.")
  }  # end if !models
 
  else {
    if (!is.null(obs.data)) message("Argument 'obs.data' ignored in favour of 'models'.")
    if (!is.null(pred.data)) message("Argument 'pred.data' ignored in favour of 'models'.")
 
    n <- sapply(models, function(x) length(resid(x)))
    if(!(all(n == n[1]))) stop("Models must all be based on the same sample size.")
 
    obs.data <- pred.data <- fav.data <- matrix(data = NA, nrow = length(resid(models[[1]])), ncol = length(models), dimnames = list(NULL, names(models)))
    for (m in 1 : length(models)) {
      obs.data[ , m] <- models[[m]]$y
      pred.data[ , m] <- models[[m]]$fitted.values
    }  # end for m
  }  # end if models
 
  n.models <- ncol(obs.data)
  n.measures <- length(measures)
  results <- matrix(NA, nrow = n.models, ncol = n.measures)
  rownames(results) <- colnames(obs.data)
  colnames(results) <- measures
 
  message(n.models, " models to evaluate; ", n.measures,  " measures to calculate for each.")
 
  if (any(measures %in% modEvAmethods("threshMeasures"))) {
    thresh.measures <- measures[measures %in% modEvAmethods("threshMeasures")]
    cat("- using '", thresh, "' as the threshold value to calculate ", paste(thresh.measures, collapse = ", "), "\n", sep = "")  # doesn't quite work with 'message' or 'paste' or with a 'sep'
  }  # this needs to be oustide the 'for' loop
 
  bin.measures <- c("HL", "HL.p", "RMSE")  # ABC, unityRsq
  if (any(measures %in% bin.measures)) {
    bin.measures <- measures[measures %in% bin.measures]
    cat("- using '", bin.method, "' as the bin method to calculate ", paste(bin.measures, collapse = ", "), "\n", sep = "")  # doesn't quite work with 'message' or 'paste' or with a 'sep'
  }  # this needs to be oustide the 'for' loop
 
for (m in 1:n.models) {
 
    message("Evaluating model ", m, "...")
 
    if ("Prevalence" %in% measures)
      results[m, "Prevalence"] <- prevalence(obs.data[ , m])
 
    if ("Evenness" %in% measures)
      results[m, "Evenness"] <- evenness(obs.data[ , m])
 
    if ("AUC" %in% measures)
      results[m, "AUC"] <- AUC(obs = obs.data[ , m], pred = pred.data[ , m], simplif = TRUE)
 
    if (any(measures %in% modEvAmethods("threshMeasures"))) {
      for (m in 1:n.models)  for (tm in thresh.measures) {
        results[m, tm] <- threshMeasures(obs = obs.data[ , m], pred = pred.data[ , m], measures = tm, thresh = thresh, standardize = standardize, simplif = TRUE)
      }; rm(m, tm)
    }  # end if measures in modEvAmethods("threshMeasures")
    # thresh.measures <- optiThresh(obs = obs.data[ , m], pred = pred.data[ , m], plot = FALSE, optimize = "each")
 
    if (any(measures %in% c("HL", "HL.p"))) {
      for (m in 1:n.models) {
        HL <- HLfit(obs = obs.data[ , m], pred = pred.data[ , m], bin.method = bin.method, simplif = TRUE)
        if ("HL" %in% measures)  results[m, "HL"] <- HL$chi.sq
        if ("HL.p" %in% measures)  results[m, "HL.p"] <- HL$p.value
        if ("RMSE" %in% measures)  results[m, "RMSE"] <- HL$RMSE
      }; rm(m)
    }  # end if HL
 
    if (any(measures %in% c("Miller.int", "Miller.slope", "Miller.p"))) {
      for (m in 1:n.models) {
        Miller <- MillerCalib(obs = obs.data[ , m], pred = pred.data[ , m], plot = FALSE)
        if ("Miller.int" %in% measures)
          results[m, "Miller.int"] <- Miller$intercept
        if ("Miller.slope" %in% measures)
          results[m, "Miller.slope"] <- Miller$slope
        if ("Miller.p" %in% measures)
          results[m, "Miller.p"] <- Miller$slope.pvalue
      }; rm(m)
    } # end if Miller
  }  # end for m
 
  if (standardize) {
    colnames(results)[colnames(results) == "kappa"] <- "skappa"
    colnames(results)[colnames(results) == "TSS"] <- "sTSS"
  }  # end if standardize
 
  results <- data.frame(Model = rownames(results), results, row.names = NULL)
  message("Finished!")
  return(results)
}

[presented with Pretty R] Usage examples:

multModEv(obs.data = mydata[ , 2:8], pred.data = mydata[ , 9:15], thresh = “preval”) multModEv(models = mymodels$models) multModEv(models = mymodels$models, thresh = 0.5, measures = c(“AUC”, “CCR”, “Sensitivity”, “TSS”))

REFERENCES: Barbosa A.M., Brown J.A. & Real R. (2014) modEvA – an R package for model evaluation and analysis. R package, version 0.1.

Advertisements

Comment

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s