Skip to content
Snippets Groups Projects
Select Git revision
  • c5c700d1f06ccab31eba7eb72b03195117853661
  • master default protected
  • beta
  • dev
  • andrewssobral-patch-1
  • update
  • thomas-fork
  • 2.0
  • v3.2.0
  • v3.1.0
  • v3.0
  • bgslib_py27_ocv3_win64
  • bgslib_java_2.0.0
  • bgslib_console_2.0.0
  • bgslib_matlab_win64_2.0.0
  • bgslib_qtgui_2.0.0
  • 2.0.0
  • bgs_console_2.0.0
  • bgs_matlab_win64_2.0.0
  • bgs_qtgui_2.0.0
  • v1.9.2_x86_mfc_gui
  • v1.9.2_x64_java_gui
  • v1.9.2_x86_java_gui
23 results

PreProcessor.cpp

Blame
  • Code owners
    Assign users and groups as approvers for specific file changes. Learn more.
    functions.R 9.46 KiB
    make_md <- function(f=file){
      
      
      
      
      rmarkdown::render("simulation_output.rmd",
                        output_file = paste0(
                          stringr::str_remove_all(
                            file,"parameters_|.R$"),".html"),
                        params = list(file=file)
      )
      
    }
    
    
    sim_all <- function(){
    
      require("stringr")
      
      designfile<-list.files(designpath,full.names = T)
      designname <- str_remove_all(list.files(designpath,full.names = F),
                                   "(.ngd|_|.RDS)")  ## Make sure designnames to not contain file ending and "_", as the may cause issues when replace
      
    if (!exists("destype")) destype="ngene"
    
    tictoc::tic()
    
    all_designs<- purrr::map(designfile, sim_choice,
                             no_sim= nosim,respondents = resps, mnl_U = mnl_U, destype=destype) %>%  ## iterate simulation over all designs
      setNames(designname)
    
    
    time <- tictoc::toc()
    
    print(time)
    
    
    
    powa <- map(all_designs, ~ .x$power)
    
    
    
    
    summaryall <- as.data.frame(purrr::map(all_designs, ~.x$summary)) %>% 
      dplyr::select(!ends_with("vars")) %>% 
      relocate(ends_with(c(".n", "mean","sd", "min" ,"max", "range" , "se" )))
    
    coefall <- map(all_designs, ~ .x$coefs)
    
    pat<-paste0("(",paste(designname,collapse = "|"),").") # needed to identify pattern to be replaced
    
    s<-as.data.frame(coefall) %>% 
      dplyr::select(!matches("pval|run")) %>%
      rename_with(~ sub("est_b", "", .x), everything()) %>%
      #  rename_with(~ sub("est_asc_", "asc", .x), everything()) %>%
      rename_with( ~ paste0(.,"_",stringr::str_extract(.,pat )), everything() ) %>%   # rename attributes for reshape part 1
      rename_with( ~ stringr::str_replace(.,pattern = pat,replacement=""), everything() )  %>% 
      reshape(varying =1:ncol(.), sep = "_"  , direction = "long" ,timevar = "design", idvar = "run" )
    
    
    p=list()
    
    for (att in names(dplyr::select(s,-c("design","run")))) {
      
      p[[att]] <- plot_multi_histogram(s,att,"design")
      
      print(p[[att]])
      
    }
    
    all_designs[["summaryall"]] = summaryall
    all_designs[["graphs"]]=p
    all_designs[["powa"]]=powa
    all_designs[["time"]]=time
    
    
    
    return(all_designs)
    }
    
    
    
    
    readdesign <- function(design = designfile, designtype=destype) {
      
      
      design <- switch(designtype,
                       
                       "ngene" =readr::read_delim(design,delim = "\t",
                                                  escape_double = FALSE,
                                                  trim_ws = TRUE  , 
                                                  col_select = c(-Design, -starts_with("...")) ,
                                                  name_repair = "universal" , show_col_types = FALSE) %>% 
                         filter(!is.na(Choice.situation)),
                       
                       "spdesign" =as.data.frame(readRDS(design)) %>%
                         mutate(Choice.situation=1:n()) %>% 
                         rename_with(~ stringr::str_replace(.,pattern = "_","\\."), everything()), 
                       
                       
                       stop("Invalid value for design. Please provide either 'ngene' or 'spdesign'.")
      )  
      
    } 
    
    
    simulate_choices <- function(data=datadet, utility =utils, setspp) {  #the part in dataset that needs to be repeated in each run
      
    
     
      by_formula <- function(equation){ #used to take formulas as inputs in simulation utility function
        # //! cur_data_all may get deprecated in favor of pick
        # pick(everything()) %>%
        cur_data_all() %>%
          transmute(!!lhs(equation) := !!rhs(equation) )
      } 
       
      cat(" \n does sou_gis exist: ", exists("sou_gis"), "\n")
      
      if (exists("sou_gis") && is.function(sou_gis)) {
        sou_gis()
        
        cat("\n source of gis has been done \n")
      }    
      
      # source("Projects/ValuGaps/code/GIS_data.R")
      
      if(!exists("manipulations")) manipulations=list() ## If no user input on further data manipulations
      
      n=seq_along(1:length(utility))    # number of utility functions
      
      
    
      
      
      cat("\n dataset final_set exists: ",exists("final_set"), "\n")
      
      if(exists("final_set")) data = left_join(data,final_set, by = "ID")   
      
      cat("\n decisiongroups exists: " ,exists("decisiongroups"))  
      
      if(exists("decisiongroups"))  {     ### create a new variable to classify decision groups.
        
        data = mutate(data,group = as.numeric(cut(row_number(), 
                                                  breaks = decisiongroups * n(), 
                                                  labels = seq_along(decisiongroups[-length(decisiongroups)]),
                                                  include.lowest = TRUE)))
        
        print(table(data$group))
      } else {
        
        data$group=1
      }
      
      
      data<- data %>% 
        group_by(ID) %>% 
        mutate(!!! manipulations) 
      
      
      
      subsets<- split(data,data$group)     
      
      subsets <-  map2(.x = seq_along(u),.y = subsets,
                       ~ mutate(.y,map_dfc(u[[.x]],by_formula)))
      
      data <-bind_rows(subsets)           
      
      data<- data %>%       
        rename_with(~ stringr::str_replace(.,pattern = "\\.","_"), everything()) %>% 
        mutate(across(.cols=n,.fns = ~ rgumbel(setspp,loc=0, scale=1), .names = "{'e'}_{n}" ), 
               across(starts_with("V_"), .names = "{'U'}_{n}") + across(starts_with("e_")) ) %>% ungroup() %>% 
        mutate(CHOICE=max.col(.[,grep("U_",names(.))])
        )   %>% 
        as.data.frame()
      
      
      
      
      cat("\n data has been made \n")
      
      cat("\n First few observations \n ") 
      print(head(data))
      cat("\n \n ")   
      return(data)
      
    } 
    
    
    sim_choice <- function(designfile, no_sim=10, respondents=330, mnl_U,utils=u[[1]] ,destype) {
     
      
      require("tictoc")
      require("readr")
      require("psych")
      require("dplyr")          
      require("evd")           
      require("tidyr")
      require("kableExtra")
      require("gridExtra")
      require("stringr")
      require("mixl")
      require("furrr")
      require("purrr")
      require("ggplot2")
      require("formula.tools")  
      require("rlang")
      
    
    
      
      mnl_U <-paste(map_chr(utils,as.character,keep.source.attr = TRUE),collapse = "",";") %>%
        str_replace_all( c( "priors\\[\"" = "" , "\"\\]" = "" ,  "~" = "=", "\\." = "_" , " b" = " @b"  , "V_"="U_", " alt"="$alt"))
      
     cat("mixl \n") 
     cat(mnl_U)
     
     cat("\n Simulation \n")
     
     print(u)
     
      
      
    
      
      
    
    
      
    
      
      estimate_sim <- function(run=1) {         #start loop
        
        cat("This is Run number ", run)
        
        database <- simulate_choices(datadet, utility = utils, setspp=setpp ) 
        
        
       cat("This is the utility functions \n" , mnl_U) 
        
        model<-mixl::estimate(model_spec,start_values = est, availabilities = availabilities, data= database,)
        
        return(model)   
        
      }
       
      designs_all <- list() 
      
     
     design<- readdesign(design = designfile)
      
     if (!exists("design$Block")) design$Block=1 
      nsets<-nrow(design)        
      nblocks<-max(design$Block)
      setpp <- nsets/nblocks      # Choice Sets per respondent; in this 'no blocks' design everyone sees all 24 sets
    
      replications <- respondents/nblocks
    
      
     
      
      datadet<- design %>%
        arrange(Block,Choice.situation) %>% 
        slice(rep(row_number(), replications)) %>%    ## replicate design according to number of replications
        mutate(ID = rep(1:respondents, each=setpp)) %>%  # create Respondent ID.
        relocate(ID,`Choice.situation`) %>% 
    as.data.frame()
      
      
      
      
      
    database <- simulate_choices(data=datadet, utility = utils, setspp = setpp) 
      
    model_spec <- mixl::specify_model(mnl_U, database, disable_multicore=F)
      
    est=setNames(rep(0,length(model_spec$beta_names)), model_spec$beta_names)
      
      
    availabilities <- mixl::generate_default_availabilities(
        database, model_spec$num_utility_functions)
      
    
    output<- 1:no_sim %>% map(estimate_sim)
     
      
      
       
    coefs<-map(1:length(output),~summary(output[[.]])[["coefTable"]][c(1,8)]  %>%
                   tibble::rownames_to_column() %>%
                   pivot_wider(names_from = rowname, values_from = c(est, rob_pval0)) ) %>% 
        bind_rows(.id = "run")
      
      output[["summary"]] <-psych::describe(coefs[,-1], fast = TRUE)
      
      output[["coefs"]] <-coefs
      
      pvals <- output[["coefs"]] %>% dplyr::select(starts_with("rob_pval0"))
      
      output[["power"]] <- 100*table(apply(pvals,1,  function(x) all(x<0.05)))/nrow(pvals)
      
      
      output[["metainfo"]] <- c(Path = designfile, NoSim = no_sim, NoResp =respondents)
      
      
      print(kable(output[["summary"]],digits = 2, format = "rst"))
      
      
      print(output[["power"]])
      
      
      return(output)  
      
      
    }
    
    
    plot_multi_histogram <- function(df, feature, label_column) { #function to create nice multi histograms, taken somewhere from the web
      plt <- ggplot(df, aes(x=eval(parse(text=feature)), fill=eval(parse(text=label_column)))) +
        #geom_histogram(alpha=0.7, position="identity", aes(y = ..density..), color="black") +
        geom_density(alpha=0.5) +
        geom_vline(aes(xintercept=mean(eval(parse(text=feature)))), color="black", linetype="dashed", linewidth=1) +  ## this makes a vertical line of the mean
        labs(x=feature, y = "Density")
      plt + guides(fill=guide_legend(title=label_column))
    } 
    
    
    
    
    
    download_and_extract_zip <- function(url, dest_folder = ".", zip_name = NULL) {
      # If zip_name is not provided, extract it from the URL
      if (is.null(zip_name)) {
        zip_name <- basename(url)
      }
      folder <- paste0(dest_folder,"/data")
    
      
        # Check if the folder is empty
        if (length(list.files(folder)) > 0) {
          warning("Destination folder is not empty. Nothing copied.")
          return(invisible(NULL))
        }
      
      
      # Download the zip file
      download.file(url, zip_name, method = "auto", quiet = FALSE, mode = "w", cacheOK = TRUE)
      
      # Extract the contents
      unzip(zip_name, exdir = dest_folder)
      
      
      # Return the path to the extracted folder
      return(file.path(dest_folder, tools::file_path_sans_ext(zip_name)))
    }