Let’s load stuff

library(tidyverse)  #as usual
## Warning: package 'dplyr' was built under R version 4.0.4
#library(drc)
#library(mgcv)
main_tib <- read_csv("AMBT descriptions condensed pred and AUC.csv", col_names = TRUE, na = c("", "NA"))
## Parsed with column specification:
## cols(
##   a = col_double(),
##   b = col_double(),
##   c = col_double(),
##   day_28 = col_double(),
##   day_14 = col_double(),
##   pred_14 = col_double(),
##   pred_28 = col_double(),
##   auc_day14 = col_double(),
##   auc_day28 = col_double(),
##   Desc = col_character(),
##   agg = col_character(),
##   ash_type = col_character(),
##   pc_ash = col_double(),
##   pc_Li = col_double(),
##   non_stnd = col_logical(),
##   other_pozz = col_character(),
##   pc_other = col_double()
## )
glimpse(main_tib)
## Rows: 264
## Columns: 17
## $ a          <dbl> 1.73745396, 0.58371200, 0.65310171, 0.03852776, 0.923894...
## $ b          <dbl> 1.1368945, 1.9747051, 2.1743668, 2.2844415, 2.3294426, 2...
## $ c          <dbl> 12.021931, 8.896275, 11.739560, 7.100294, 7.579872, 11.5...
## $ day_28     <dbl> 1.25760000, 0.54880000, 0.58293333, 0.03600000, 0.906933...
## $ day_14     <dbl> 0.94293333, 0.41360000, 0.38133333, 0.03200000, 0.727466...
## $ pred_14    <dbl> 0.94376137, 0.41443504, 0.38831476, 0.03178739, 0.745384...
## $ pred_28    <dbl> 1.25681247, 0.52876332, 0.56738922, 0.03692077, 0.881874...
## $ auc_day14  <dbl> 7.85300059, 2.95911411, 2.37285639, 0.23889074, 5.404677...
## $ auc_day28  <dbl> 23.5866543, 9.7670038, 9.3586957, 0.7312627, 17.0981228,...
## $ Desc       <chr> "Placitas", "Spratt", "Placitas 100%", "Spratt 100%", "P...
## $ agg        <chr> "Placitas", "Spratt", "Placitas", "Spratt", "Placitas", ...
## $ ash_type   <chr> "None", "None", "None", "None", "None", "None", "None", ...
## $ pc_ash     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ pc_Li      <dbl> 0, 0, 100, 100, 75, 75, 50, 50, 0, 0, 100, 100, 0, 0, 75...
## $ non_stnd   <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, ...
## $ other_pozz <chr> "None", "None", "None", "None", "None", "None", "None", ...
## $ pc_other   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
#change to data frame for easier subsetting in upcoming for loop
main_df <- data.frame(as.data.frame(main_tib))
length(main_df[,1])
## [1] 264
# t_time <- main_df[,1]
# t_time
main_df$Fail <- ifelse(main_df$day_28>=0.10,1,0)
main_df$ac <- main_df$a/main_df$c
head(main_df)
##            a        b         c    day_28    day_14    pred_14    pred_28
## 1 1.73745396 1.136895 12.021931 1.2576000 0.9429333 0.94376137 1.25681247
## 2 0.58371200 1.974705  8.896275 0.5488000 0.4136000 0.41443504 0.52876332
## 3 0.65310171 2.174367 11.739560 0.5829333 0.3813333 0.38831476 0.56738922
## 4 0.03852776 2.284441  7.100294 0.0360000 0.0320000 0.03178739 0.03692077
## 5 0.92389495 2.329443  7.579872 0.9069333 0.7274667 0.74538480 0.88187483
## 6 0.13833522 2.364063 11.545871 0.1216000 0.0856000 0.08465779 0.12316605
##   auc_day14  auc_day28          Desc      agg ash_type pc_ash pc_Li non_stnd
## 1 7.8530006 23.5866543      Placitas Placitas     None      0     0    FALSE
## 2 2.9591141  9.7670038        Spratt   Spratt     None      0     0    FALSE
## 3 2.3728564  9.3586957 Placitas 100% Placitas     None      0   100    FALSE
## 4 0.2388907  0.7312627   Spratt 100%   Spratt     None      0   100    FALSE
## 5 5.4046778 17.0981228  Placitas 75% Placitas     None      0    75    FALSE
## 6 0.4980486  2.0222805    Spratt 75%   Spratt     None      0    75    FALSE
##   other_pozz pc_other Fail         ac
## 1       None        0    1 0.14452369
## 2       None        0    1 0.06561308
## 3       None        0    1 0.05563255
## 4       None        0    0 0.00542622
## 5       None        0    1 0.12188793
## 6       None        0    1 0.01198136

Let’s Plot

p1 <- ggplot(main_df, aes(pred_28, auc_day28))
p1 + geom_point() + geom_smooth(se=TRUE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

p1 + geom_point(aes(color=as.factor(Fail))) + geom_smooth(se=TRUE) +
  xlim(0, 0.2) + ylim(0, 5)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 93 rows containing non-finite values (stat_smooth).
## Warning: Removed 93 rows containing missing values (geom_point).

p1 + geom_point(aes(color=agg)) + geom_smooth(se=TRUE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

p1 + geom_point(aes(color=agg)) + geom_smooth(se=TRUE) +
  xlim(0, 0.75) + ylim(0, 15)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 21 rows containing non-finite values (stat_smooth).
## Warning: Removed 21 rows containing missing values (geom_point).

p1 + geom_point(aes(color=ash_type)) + geom_smooth(se=TRUE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

p1 + geom_point(aes(color=ash_type)) + geom_smooth(se=TRUE) +
  xlim(0, 0.75) + ylim(0, 15)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 21 rows containing non-finite values (stat_smooth).

## Warning: Removed 21 rows containing missing values (geom_point).

p1 + geom_point(aes(color=pc_ash)) + geom_smooth(se=TRUE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

p1 + geom_point(aes(color=pc_ash)) + geom_smooth(se=TRUE) +
  xlim(0, 0.75) + ylim(0, 15)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 21 rows containing non-finite values (stat_smooth).

## Warning: Removed 21 rows containing missing values (geom_point).

p1 + geom_point(aes(color=pc_Li)) + geom_smooth(se=TRUE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

p1 + geom_point(aes(color=pc_Li)) + geom_smooth(se=TRUE) +
  xlim(0, 0.75) + ylim(0, 15)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 21 rows containing non-finite values (stat_smooth).

## Warning: Removed 21 rows containing missing values (geom_point).

p2 <- ggplot(main_df, aes(day_28, a))
p2 + geom_point() + geom_smooth(se=TRUE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

p2 + geom_point(aes(color=agg)) + geom_smooth(se=TRUE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

p2 + geom_point(aes(color=ash_type)) + geom_smooth(se=TRUE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

p2 + geom_point(aes(color=pc_ash)) + geom_smooth(se=TRUE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

p2 + geom_point(aes(color=pc_Li)) + geom_smooth(se=TRUE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

p3 <- ggplot(main_df, aes(day_28, c))
p3 + geom_point() + geom_smooth(se=TRUE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

p3 + geom_point(aes(color=agg)) + geom_smooth(se=TRUE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

p3 + geom_point(aes(color=ash_type)) + geom_smooth(se=TRUE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

p3 + geom_point(aes(color=pc_ash)) + geom_smooth(se=TRUE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

p3 + geom_point(aes(color=pc_Li)) + geom_smooth(se=TRUE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

pPl <- ggplot(subset(main_df, agg == "Placitas"), 
             aes(day_28, c))
pPl + geom_point() + geom_smooth(se=TRUE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

pGrey <- ggplot(subset(main_df, agg == "Greywacke"), 
             aes(day_28, c))
pGrey + geom_point() + geom_smooth(se=TRUE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

pSpr <- ggplot(subset(main_df, agg == "Spratt"), 
             aes(day_28, c))
pSpr + geom_point() + geom_smooth(se=TRUE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

pTx <- ggplot(subset(main_df, agg == "Texas"), 
             aes(day_28, c))
pTx + geom_point() + geom_smooth(se=TRUE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

pPl <- ggplot(subset(main_df, agg == "Placitas"), 
             aes(day_28, a))
pPl + geom_point() + geom_smooth(se=TRUE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

pGrey <- ggplot(subset(main_df, agg == "Greywacke"), 
             aes(day_28, a))
pGrey + geom_point() + geom_smooth(se=TRUE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

pSpr <- ggplot(subset(main_df, agg == "Spratt"), 
             aes(day_28, a))
pSpr + geom_point() + geom_smooth(se=TRUE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

pTx <- ggplot(subset(main_df, agg == "Texas"), 
             aes(day_28, a))
pTx + geom_point() + geom_smooth(se=TRUE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

pPl <- ggplot(subset(main_df, agg == "Placitas" &
                       ash_type == "None"), 
             aes(day_28, c))
pPl + geom_point() + geom_smooth(se=TRUE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

pGrey <- ggplot(subset(main_df, agg == "Greywacke" &
                       ash_type == "None"), 
             aes(day_28, c))
pGrey + geom_point() + geom_smooth(se=TRUE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

pSpr <- ggplot(subset(main_df, agg == "Spratt" &
                       ash_type == "None"), 
             aes(day_28, c))
pSpr + geom_point() + geom_smooth(se=TRUE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

pTx <- ggplot(subset(main_df, agg == "Texas" &
                       ash_type == "None"), 
             aes(day_28, c))
pTx + geom_point() + geom_smooth(se=TRUE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Let’s play with tabyl

library(janitor)
## Warning: package 'janitor' was built under R version 4.0.4
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
t1 <- main_df %>% tabyl(agg)
t1 %>% adorn_totals() %>% 
  adorn_rounding(digits = 3, rounding = "half to even", percent) %>% knitr::kable()
agg n percent
Black aggregate 1 0.004
DE Coarse 11 0.042
DE Sand 5 0.019
Greywacke 56 0.212
Hylas 11 0.042
Minnesota 8 0.030
Placitas 93 0.352
Spratt 44 0.167
Texas 34 0.129
Virginia 1 0.004
Total 264 1.000
t2 <- main_df %>% tabyl(agg, ash_type)
t2 %>% adorn_totals(c("row", "col")) %>% knitr::kable()
agg F1 F2 F3 F4 None Total
Black aggregate 0 0 0 0 1 1
DE Coarse 3 3 0 0 5 11
DE Sand 0 0 0 0 5 5
Greywacke 18 18 0 0 20 56
Hylas 1 1 0 0 9 11
Minnesota 3 2 0 0 3 8
Placitas 26 21 0 0 46 93
Spratt 8 7 1 1 27 44
Texas 8 8 0 0 18 34
Virginia 0 0 0 0 1 1
Total 67 60 1 1 135 264