8000 update · laderast/burro@0d5b654 · GitHub
[go: up one dir, main page]

Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
mypartyparrot committed Jul 1, 2020
1 parent a9e2359 commit 0d5b654
Show file tree
Hide file tree
Showing 10 changed files with 68 additions and 81 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ Imports:
Depends:
shiny
Roxygen: list(markdown = TRUE)
RoxygenNote: 6.1.1
RoxygenNote: 7.0.2
Suggests:
testthat
URL: http://github.com/laderast/burro
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

export("%>%")
export(build_shiny_app)
export(check_data)
export(explore_data)
import(dplyr)
import(ggplot2)
Expand Down
44 changes: 26 additions & 18 deletions R/check_data.R
Original file line number Diff line number Diff line change
@@ -1,26 +1,29 @@
check_data <- function(dataset){
if(!is.null(covariates)) {
#' Checks the Dataset and sets attributes on it for use in
#'
#' @param dataset
#' @param covariates
#' @param outcome_var
#'
#' @return
#' @export
#'
#' @examples
check_data <- function(dataset, covariates=NULL, outcome_var=NULL){

covariates_in_data <- covariates %in% colnames(dataset)

num_in_data <- which(covariates_in_data)
not_in_data <- which(!covariates_in_data)

if(length(num_in_data) == 0) {
stop("Your covariates aren't in the dataset - make sure they correpond to column names in the data")
}

if(length(not_in_data) > 0) {
warning(
if(length(num_in_data) == 0 & length(covariates) > 0) {
paste0("The following covariates weren't in the dataset:", paste(covariates[not_in_data]))
)
}

}


myDataFrame <- data.table::data.table(dataset)

if(!is.null(covariates)){
covariates <- covariates[covariates_in_data]

if(length(covariates) > 0){
myDataFrame <- myDataFrame[,covariates,with=FALSE]
}

Expand All @@ -34,16 +37,15 @@ check_data <- function(dataset){
in_dataset <- length(which(outcome_var %in% colnames(dataset)))

# need to check column names and outcome var names in data
if(in_dataset == 0){
stop("Your outcome variable is not the dataset - try using colnames(data) to select it")
if(in_dataset == 0 & length(outcome_var) > 0){
warning("Your outcome variable is not the dataset - using all categorical variables as outcome")
}

if(in_dataset < length(outcome_var)){
warning("Some of your outcomes weren't in the dataset")
warning("Some of your outcomes weren't in the dataset - using the ones we found")
}

#myDataFrame <- burro:::sanitize_data_frame(myDataFrame, outcome_var)
remove_categories <- outcome_var

cat_no_outcome <- categoricalVars

if(length(outcome_var) != length(categoricalVars)){
Expand All @@ -53,5 +55,11 @@ check_data <- function(dataset){

numericVars <- sort(burro:::get_numeric_variables(myDataFrame))

attr(myDataFrame, "outcome_var") <- outcome_var
attr(myDataFrame, "cat_no_outcome") <- cat_no_outcome
attr(myDataFrame, "categoricalVars") <- categoricalVars
attr(myDataFrame, "numericVars") <- numericVars

return(myDataFrame)

}
5 changes: 5 additions & 0 deletions R/helper.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@ get_numeric_variables <- function(df){
varClass <- sapply(df, class)

numericVars <- names(varClass[varClass %in% c("numeric", "integer", "Date")])

if(length(numericVars) == 0){
numericVars <- NULL
}

return(numericVars)
}

Expand Down
60 changes: 5 additions & 55 deletions R/xplor.R
Original file line number Diff line number Diff line change
Expand Up @@ -51,62 +51,12 @@ explore_data <- function(dataset, covariates=NULL,
#Sys.setlocale("LC_CTYPE", "Chinese")
dataset_name <- deparse(substitute(dataset))

myDataFrame <- check_data(dataset)

if(!is.null(covariates)) {

covariates_in_data <- covariates %in% colnames(dataset)
num_in_data <- which(covariates_in_data)
not_in_data <- which(!covariates_in_data)

if(length(num_in_data) == 0) {
stop("Your covariates aren't in the dataset - make sure they correpond to column names in the data")
}

if(length(not_in_data) > 0) {
warning(
paste0("The following covariates weren't in the dataset:", paste(covariates[not_in_data]))
)
}


}


myDataFrame <- data.table::data.table(dataset)

if(!is.null(covariates)){
myDataFrame <- myDataFrame[,covariates,with=FALSE]
}

categoricalVars <- sort(names(burro:::get_category_variables(myDataFrame)))
outcome_var <- outcome_var[outcome_var %in% categoricalVars]

#todo - just show two way dropdown if outcome_var = NULL
if(is.null(outcome_var)){
outcome_var <- categoricalVars
}

in_dataset <- length(which(outcome_var %in% colnames(dataset)))

# need to check column names and outcome var names in data
if(in_dataset == 0){
stop("Your outcome variable is not the dataset - try using colnames(data) to select it")
}
if(in_dataset < length(outcome_var)){
warning("Some of your outcomes weren't in the dataset")
}

#myDataFrame <- burro:::sanitize_data_frame(myDataFrame, outcome_var)
remove_categories <- outcome_var

cat_no_outcome <- categoricalVars

if(length(outcome_var) != length(categoricalVars)){
cat_no_outcome <-
setdiff(categoricalVars, remove_categories)
}

numericVars <- sort(burro:::get_numeric_variables(myDataFrame))
numericVars <- attr(myDataFrame, "numericVars")
categoricalVars <- attr(myDataFrame, "categoricalVars")
outcome_var <- attr(myDataFrame, "outcome_var")
cat_no_outcome <- attr(myDataFrame, "cat_no_outcome")

ggplot2::theme_set(ggplot2::theme_classic(base_size = 15))
#data_dictionary <- readr::read_csv("data/data_dictionary.csv") %>%
Expand Down
Binary file removed data/smoke_complete.xlsx
Binary file not shown.
17 changes: 17 additions & 0 d 8000 eletions man/check_data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 6 additions & 2 deletions man/explore_data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions tests/testthat/test-get_categorical_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,10 @@ test_that("get category variables works", {
expect_equal(names(cats1), "Species")
expect_equal(NULL, cats4)
})

data1 <- check_data(mtcars)
data2 <- check_data(diamonds)

test_that("check_data works",{

})
5 changes: 0 additions & 5 deletions tests/testthat/test-get_continuous_data.R

This file was deleted.

0 comments on commit 0d5b654

Please sign in to comment.
0