From 334f9db65d441f732202cc25ef26dfede354ef76 Mon Sep 17 00:00:00 2001 From: dj44vuri <julian.sagebiel@idiv.de> Date: Sat, 14 Oct 2023 21:53:24 +0200 Subject: [PATCH] some improvements in createSets function --- R/createSets.R | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++ R/makesets.R | 55 ---------------------------------------- 2 files changed, 68 insertions(+), 55 deletions(-) create mode 100644 R/createSets.R delete mode 100644 R/makesets.R diff --git a/R/createSets.R b/R/createSets.R new file mode 100644 index 0000000..49731a8 --- /dev/null +++ b/R/createSets.R @@ -0,0 +1,68 @@ +createSets <- function(.data, choice, attributes , uniquerow, prefix="a") { + require("dplyr") + require("tidyr") + require("purrr") + + if (!is.data.frame(.data)) { + stop("The input data (.data) must be a data frame or tibble.") + } + if (!all(c(choice, uniquerow) %in% names(.data))) { + stop("Both choice and uniquerow columns must exist in the input data.") + } + attribute_cols <- select(.data, {{ attributes }}) %>% names() + if (length(setdiff(attribute_cols, names(.data))) > 0) { + stop("Some columns specified in attributes do not exist in the input data.") + } + + if (any(is.na(.data[c(choice, uniquerow, attribute_cols)]))) { + stop("The columns choice, uniquerow, and attributes should not have missing values.") + } + + sets <- .data %>% + select({{ attributes }}, {{ choice }}, {{ uniquerow }} ) %>% + group_by(!!rlang::sym(uniquerow), !!rlang::sym(choice)) %>% + add_count() %>% ungroup %>% + group_by(!!rlang::sym(uniquerow)) %>% + distinct(n, .keep_all=TRUE) %>% + mutate(perc = round((n / sum(n) * 100))) %>% + arrange({{ uniquerow }}, {{ choice }}) %>% + group_split() %>% + set_names(map(., ~ unique(as.character(.x[[rlang::as_string(uniquerow)]])))) + + + + makesets <- function(.data) { + + + .data %>% + pivot_wider( + id_cols = c({{ uniquerow }}, everything()), + names_from = {{ choice }}, + values_from = c(n, perc), + names_sep = "." + ) %>% + select(- {{ uniquerow }}) %>% + rename_with( + ~ gsub(paste0("^(", prefix, "(\\d+))_(.*)$"), "\\3.\\2", .), + matches(paste0("^", prefix, "\\d+_")) + ) %>% + pivot_longer( + cols = everything(), + names_to = c("name", "suffix"), + names_pattern = "(.*)\\.(.*)" + ) %>% + pivot_wider( + names_from = suffix, + values_from = value + ) + } + + finalsets <- map(sets, ~makesets(.x )) + + + + return(finalsets) + +} + +#finalsets2 <- createFreq(database, choice = "pref1", attributes = ends_with(c("ZEIT","x1")), uniquerow = "UniqueRow") diff --git a/R/makesets.R b/R/makesets.R deleted file mode 100644 index e38e80b..0000000 --- a/R/makesets.R +++ /dev/null @@ -1,55 +0,0 @@ -createSets <- function(.data, choice, attributes , uniquerow) { - require("dplyr") - require("tidyr") - require("purrr") - - - - sets <- .data %>% - select({{ attributes }}, {{ choice }}, {{ uniquerow }} ) %>% - group_by(!!rlang::sym(uniquerow), !!rlang::sym(choice)) %>% - add_count() %>% ungroup %>% - group_by(!!rlang::sym(uniquerow)) %>% - distinct(n, .keep_all=TRUE) %>% - mutate(perc = round((n / sum(n) * 100))) %>% - arrange({{ uniquerow }}, {{ choice }}) %>% - group_split() %>% - set_names(map(., ~ unique(as.character(.x[[rlang::as_string(uniquerow)]])))) - - - - makesets <- function(.data) { - - - .data %>% - pivot_wider( - id_cols = c({{ uniquerow }}, everything()), - names_from = {{ choice }}, - values_from = c(n, perc), - names_sep = "." - ) %>% - select(- {{ uniquerow }}) %>% - rename_with( - ~ gsub("^(a([12]))_(.*)$", "\\3.\\2", .), - starts_with("a1") | starts_with("a2") - ) %>% - pivot_longer( - cols = everything(), - names_to = c("name", "suffix"), - names_pattern = "(.*)\\.(.*)" - ) %>% - pivot_wider( - names_from = suffix, - values_from = value - ) - } - - finalsets <- map(sets, ~makesets(.x )) - - - - return(finalsets) - -} - -#finalsets2 <- createFreq(database, choice = "pref1", attributes = ends_with(c("ZEIT","x1")), uniquerow = "UniqueRow") -- GitLab