Skip to content
Snippets Groups Projects
Select Git revision
  • e1935581395958a581c4655f6edf4530138aaa7d
  • master default protected
  • beta
  • dev
  • andrewssobral-patch-1
  • update
  • thomas-fork
  • 2.0
  • v3.2.0
  • v3.1.0
  • v3.0
  • bgslib_py27_ocv3_win64
  • bgslib_java_2.0.0
  • bgslib_console_2.0.0
  • bgslib_matlab_win64_2.0.0
  • bgslib_qtgui_2.0.0
  • 2.0.0
  • bgs_console_2.0.0
  • bgs_matlab_win64_2.0.0
  • bgs_qtgui_2.0.0
  • v1.9.2_x86_mfc_gui
  • v1.9.2_x64_java_gui
  • v1.9.2_x86_java_gui
23 results

commons-configuration-1.8.jar

Blame
  • Code owners
    Assign users and groups as approvers for specific file changes. Learn more.
    04_05_modelling_msdm_rf.R 4.48 KiB
    library(dplyr)
    library(tidyr)
    library(caret)
    library(ranger)
    
    source("R/utils.R")
    
    load("data/r_objects/model_data_random_abs_extended.RData")
    
    model_data = model_data %>% 
      dplyr::filter(!is.na(fold_eval)) %>% 
      dplyr::mutate(
        species = as.factor(species),
        present_fct = factor(present, levels = c("0", "1"), labels = c("A", "P"))
      ) 
    
    # ----------------------------------------------------------------------#
    # Train model                                                        ####
    # ----------------------------------------------------------------------#
    # Define predictors
    predictors = c("bio6", "bio17", "cmi", "rsds", "igfc", "dtfw", "igsw", "roughness", "species")
    
    # Cross validation
    for(fold in 1:5){
      ## Preparations #####
      data_train = dplyr::filter(model_data, fold_eval != fold) %>% 
        sf::st_drop_geometry()
      
      # Define caret training routine 
      train_ctrl = caret::trainControl(
        method = "cv",
        number = 5,
        classProbs = TRUE, 
        summaryFunction = caret::twoClassSummary, 
        savePredictions = "final"
      )
      
      tune_grid = expand.grid(
        mtry = c(2,4,6,8),
        splitrule = "gini",
        min.node.size = c(1,4,9,16)
      )
      
      # Run model
      rf_fit = caret::train(
        x = data_train[, predictors],
        y = data_train$present_fct,
        method = "ranger",
        metric = "Accuracy",
        trControl = train_ctrl,
        tuneGrid = tune_grid,
        weights = data_train$weight,
        num.threads = 48
      )
      
      save(rf_fit, file = paste0("data/r_objects/msdm_rf/msdm_rf_fit_random_abs_fold", fold,".RData"))
    }
    
    # Full model
    # Define caret training routine 
    full_data = model_data %>% 
      sf::st_drop_geometry()
    
    train_ctrl = caret::trainControl(
      method = "cv",
      number = 5,
      classProbs = TRUE, 
      summaryFunction = caret::twoClassSummary, 
      savePredictions = "final"
    )
    
    tune_grid = expand.grid(
      mtry = c(2,4,6,8),
      splitrule = "gini",
      min.node.size = c(1,4,9,16)
    )
    
    # Run model
    rf_fit = caret::train(
      x = full_data[, predictors],
      y = full_data$present_fct,
      method = "ranger",
      metric = "Accuracy",
      trControl = train_ctrl,
      tuneGrid = tune_grid
    )
    
    save(rf_fit, file = "data/r_objects/msdm_rf/msdm_rf_fit_random_abs_full.RData")
    
    # ----------------------------------------------------------------------#
    # Evaluate model                                                     ####
    # ----------------------------------------------------------------------#
    msdm_rf_random_abs_extended_performance = lapply(1:5, function(fold){
      load(paste0("data/r_objects/msdm_rf/msdm_rf_fit_random_abs_fold", fold, ".RData"))
      
      test_data = model_data %>% 
        dplyr::filter(fold_eval == fold, record_type != "absence_background") %>% 
        sf::st_drop_geometry()
      
      actual = factor(test_data$present, levels = c("0", "1"), labels = c("A", "P"))
      probs = predict_new(rf_fit, test_data, type = "prob")
      preds = predict_new(rf_fit, test_data, type = "class")
      
      eval_dfs = data.frame(
        species = test_data$species,
        actual,
        probs,
        preds
      ) %>% 
        group_by(species) %>% 
        group_split()
      
      
      lapply(eval_dfs, function(eval_df_spec){
        species = eval_df_spec$species[1]
        
        performance = tryCatch({
          auc = pROC::roc(eval_df_spec$actual, eval_df_spec$probs, levels = c("P", "A"), direction = ">")$auc
          cm = caret::confusionMatrix(eval_df_spec$preds, eval_df_spec$actual, positive = "P")
          
          list(
            AUC = as.numeric(auc),
            Accuracy = cm$overall["Accuracy"],
            Kappa = cm$overall["Kappa"],
            Precision = cm$byClass["Precision"],
            Recall = cm$byClass["Recall"],
            F1 = cm$byClass["F1"],
            TP = cm$table["P", "P"],
            FP = cm$table["P", "A"],
            TN = cm$table["A", "A"],
            FN = cm$table["A", "P"]
          )
        }, error = function(e){
          list(AUC = NA_real_, Accuracy = NA_real_, Kappa = NA_real_, 
               Precision = NA_real_, Recall = NA_real_, F1 = NA_real_, 
               TP = NA_real_, FP = NA_real_, TN = NA_real_, FN = NA_real_)
        })
        
        performance_summary = performance %>% 
          as_tibble() %>% 
          mutate(
            species = !!species,
            obs = nrow(dplyr::filter(model_data, species == !!species, fold_eval != !!fold)),
            fold_eval = !!fold,
            model = "MSDM_rf_random_abs_extended",
          ) %>% 
          tidyr::pivot_longer(-any_of(c("species", "obs", "fold_eval", "model")), names_to = "metric", values_to = "value") %>% 
          drop_na()
      }) %>% 
        bind_rows()
    }) %>% 
      bind_rows()
    
    save(msdm_rf_random_abs_extended_performance, file = paste0("data/r_objects/msdm_rf_random_abs_extended_performance.RData"))