From ASTM C1293 Precision Statement

“13.1.2 Average Expansion Greater Than 0.014 %—The multi-laboratory coefficient of variation of a single test result (mean of measurements of three prisms) for average expansion greater than 0.014 % has been found to be 23 % (CSA A23.2-14A-00).5 Therefore, results of two properly conducted tests in different laboratories on the same aggregate should not differ from each other by more than 65 % of their average, nineteen times out of twenty.”

library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.1.3
## Warning: package 'ggplot2' was built under R version 4.1.3
## Warning: package 'tibble' was built under R version 4.1.3
## Warning: package 'tidyr' was built under R version 4.1.3
## Warning: package 'readr' was built under R version 4.1.3
## Warning: package 'purrr' was built under R version 4.1.3
## Warning: package 'dplyr' was built under R version 4.1.3
## Warning: package 'stringr' was built under R version 4.1.3
## Warning: package 'forcats' was built under R version 4.1.3
library(sn)
## Warning: package 'sn' was built under R version 4.1.3
library(gganimate)
## Warning: package 'gganimate' was built under R version 4.1.3
library(gifski)
## Warning: package 'gifski' was built under R version 4.1.3
library(png)
## Warning: package 'png' was built under R version 4.1.1

Data Range

We’ll work from 0.04% to 0.50% expansion.

x1 <- sequence(1540, from = 400, by = 3)/10000
head(x1)
## [1] 0.0400 0.0403 0.0406 0.0409 0.0412 0.0415
tail(x1)
## [1] 0.5002 0.5005 0.5008 0.5011 0.5014 0.5017
yEquality <- x1
yHigh <- x1 * 1.963
yLow <- x1 * 0.509
ds1 <- data.frame(x = x1,yEq = yEquality, yH = yHigh, yL = yLow)
p1 <- ggplot(ds1,aes(x1,yEq))
p2 <- p1 + geom_line(linewidth = 1.5) + geom_line(aes(y=yH), color = "Red") + 
  geom_line(aes(y=yL), color = "Red") +
  labs(title ="ASTM C1293 Precision - Given 1st Result, Range in 2nd Result", 
       x = "%Expansion", y = "%Expansion") +
      theme_bw()
p2

Skew-Normal Distribution

Let’s try and find a skew-normal distribution to fit this spread, as it could make for a neat plot to have the areas between the lines filled with a scatter plot to enhance the ablity to see the possibilities indicated by the plot.

We’ll try and calculate a distribution around 0.5% expansion.

First we’ll make a global seed value:

seed1 <- 5
cp <- c(mean=.54, s.d.=0.162, gamma1=0.7)

dp <- cp2dp(cp, family="SN")

dp
##        xi     omega     alpha 
## 0.3493106 0.2502128 3.2259801
set.seed(seed1)

y_samp <- rsn(200000, dp=dp)

str(y_samp)
##  num [1:200000] 0.653 0.655 0.714 0.415 0.428 ...
##  - attr(*, "family")= chr "SN"
##  - attr(*, "parameters")= Named num [1:4] 0.349 0.25 3.226 0
##   ..- attr(*, "names")= chr [1:4] "xi" "omega" "alpha" ""
hist(y_samp, breaks = 25)

# plot(density(y_samp, bw = 0.3))
y_samp_df <- data.frame(y=y_samp)
ggplot(y_samp_df, aes(x = y)) +
  geom_density()

That looks okay (I played around with mean and gamma1 till I got a decent shape).

Let’s define a function to get one sample from a calculated distribution.

dp_samp <- function(y_mode) {
  y_mean <- y_mode*1.08
  cp_temp <- c(mean=y_mean, s.d.=0.3*y_mode, gamma1=0.7)
  dp_temp <- cp2dp(cp_temp, family="SN")
  rsn(1, dp=dp_temp)
}
#sd and gamma1 adjusted to better fit the 95% lines

Now let’s use this function to generate samples of the skewed distribution as a function of x1:

set.seed(seed1)
y1 <- x1 %>% map_dbl(dp_samp)

Now let’s make a function for calculating a ratioed difference above or below the line of equality for use in coloring the points:

#ratio_diff <- function(y, diff1) {
diff1 <- y1  
for (i in seq_along(y1)) {
   if (y1[i] > x1[i]) {
    diff1[i] <- (y1[i]-x1[i])/(0.963*x1[i])
    } else {
      diff1[i] <- (x1[i]-y1[i])/(0.491*x1[i])
    } 
  }  #return(diff1)
  
#}
#dummy1 <- y1
#diff1 <- ratio_diff(y1, dummy1)
#diff1 (and others to follow) is for coloring the points based on distance from equality
glimpse(y1)
##  num [1:1540] 0.0516 0.0521 0.0569 0.0347 0.0359 ...
head(y1)
## [1] 0.05155769 0.05207370 0.05690529 0.03471081 0.03593863 0.04814826
tail(y1)
## [1] 0.5001234 0.4815128 0.3482640 0.4627871 0.4610032 0.5699237
glimpse(diff1)
##  num [1:1540] 0.3 0.303 0.417 0.308 0.26 ...
head(diff1)
## [1] 0.3000437 0.3033763 0.4170385 0.3081975 0.2600878 0.1663541
tail(diff1)
## [1] 0.0003117497 0.0772635966 0.6203352038 0.1557181815 0.1640895742
## [6] 0.1412099088
glimpse(x1)
##  num [1:1540] 0.04 0.0403 0.0406 0.0409 0.0412 0.0415 0.0418 0.0421 0.0424 0.0427 ...
head(x1)
## [1] 0.0400 0.0403 0.0406 0.0409 0.0412 0.0415
tail(x1)
## [1] 0.5002 0.5005 0.5008 0.5011 0.5014 0.5017

Let’s add this column to the dataframe ds1

ds1 %>% add_column(y1=y1)

Let’s see how it looks:

p3 <- p2 + geom_point(aes(x=x1,y=y1, color=diff1), size = 0.5)
p3

Let’s make a denser plot. We’ll generate a few more samplings, and then overlay these scatterplots.

set.seed(seed1+1)
y2 <- x1 %>% map_dbl(dp_samp)
#diff2 <- abs(y2-x1)
diff2 <- y2  
for (i in seq_along(y2)) {
   if (y2[i] > x1[i]) {
    diff2[i] <- (y2[i]-x1[i])/(0.963*x1[i])
    } else {
      diff2[i] <- (x1[i]-y2[i])/(0.491*x1[i])
    } 
  }
set.seed(seed1+2)
y3 <- x1 %>% map_dbl(dp_samp)
#diff3 <- abs(y3-x1)
diff3 <- y3  
for (i in seq_along(y3)) {
   if (y3[i] > x1[i]) {
    diff3[i] <- (y3[i]-x1[i])/(0.963*x1[i])
    } else {
      diff3[i] <- (x1[i]-y3[i])/(0.491*x1[i])
    } 
  }
set.seed(seed1+3)
y4 <- x1 %>% map_dbl(dp_samp)
#diff4 <- abs(y4-x1)
diff4 <- y4  
for (i in seq_along(y4)) {
   if (y4[i] > x1[i]) {
    diff4[i] <- (y4[i]-x1[i])/(0.963*x1[i])
    } else {
      diff4[i] <- (x1[i]-y4[i])/(0.491*x1[i])
    } 
  }
set.seed(seed1+4)
y5 <- x1 %>% map_dbl(dp_samp)
#diff5 <- abs(y5-x1)
diff5 <- y5  
for (i in seq_along(y5)) {
   if (y5[i] > x1[i]) {
    diff5[i] <- (y5[i]-x1[i])/(0.963*x1[i])
    } else {
      diff5[i] <- (x1[i]-y5[i])/(0.491*x1[i])
    } 
  }
# now we'll add these as columns in ds1
ds1 %>% add_column(y2=y2, y3=y3, y4=y4, y5=y5, diff1=diff1, diff2=diff2, diff3=diff3, diff4=diff4, diff5=diff5)

Let’s see how it looks:

p4 <- p3 + geom_point(aes(x=x1,y=y2,color=diff2), size = 0.5) +
  geom_point(aes(x=x1,y=y3,color=diff3), size = 0.5) +
  geom_point(aes(x=x1,y=y4,color=diff4), size = 0.5) +
  geom_point(aes(x=x1,y=y5,color=diff5), size = 0.5) +
  scale_color_gradient(high ="#FF3366", low ="#333333", guide="none") +
  annotate("text", x = 0.2, y = .6, label = "Upper 95% limit") +
  annotate("text", x = 0.4, y = .08, label = "Lower 95% limit") +
  annotate("text", x = 0.15, y = .94, label = "(Random sampling of 7700 points)") +
  annotate("text", x = 0.15, y = 1.1, label = "(1st result is anywhere on the center line)")
p4

ggsave("ASTM C1293 Precision Plot4.pdf")
## Saving 7 x 5 in image
ggsave("ASTM C1293 Precision Plot4.png")
## Saving 7 x 5 in image

Let’s check a histogram and density plot with ggplot2 for the original vector we made:

#first we need a data frame
xDummy <- sequence(200000, from = 1, by = 1)
ds_Dummy <- data.frame(x = xDummy,y = y_samp)
pHist <- ggplot(ds_Dummy, aes(x=y))
  
pHist + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

pHist + geom_density()

Now let’s make a series of graphs suitable for a slide show:

#As we move backwards to make the succesive graphs, first we'll remove the points

p3_b <- p2 + geom_point(aes(x=x1,y=y1, color=diff1), size = 0.5, alpha = 0)
p4_b <- p3_b + geom_point(aes(x=x1,y=y2,color=diff2), size = 0.5, alpha = 0) +
  geom_point(aes(x=x1,y=y3,color=diff3), size = 0.5, alpha = 0) +
  geom_point(aes(x=x1,y=y4,color=diff4), size = 0.5, alpha = 0) +
  geom_point(aes(x=x1,y=y5,color=diff5), size = 0.5, alpha = 0) +
  scale_color_gradient(high ="#FF3366", low ="#333333", guide="none") +
  annotate("text", x = 0.2, y = .6, label = "Upper 95% limit") +
  annotate("text", x = 0.4, y = .08, label = "Lower 95% limit") +
  annotate("text", x = 0.15, y = .94, label = "(Random sampling of 7700 points)") +
  annotate("text", x = 0.15, y = 1.1, label = "(1st result is anywhere on the center line)")
p4_b

ggsave("ASTM C1293 Precision Plot3.pdf")
## Saving 7 x 5 in image
ggsave("ASTM C1293 Precision Plot3.png")
## Saving 7 x 5 in image
p4_c <- p3_b + geom_point(aes(x=x1,y=y2,color=diff2), size = 0.5, alpha = 0) +
  geom_point(aes(x=x1,y=y3,color=diff3), size = 0.5, alpha = 0) +
  geom_point(aes(x=x1,y=y4,color=diff4), size = 0.5, alpha = 0) +
  geom_point(aes(x=x1,y=y5,color=diff5), size = 0.5, alpha = 0) +
  scale_color_gradient(high ="#FF3366", low ="#333333", guide="none") +
  annotate("text", x = 0.2, y = .6, label = "Upper 95% limit") +
  annotate("text", x = 0.4, y = .08, label = "Lower 95% limit") +
  #annotate("text", x = 0.15, y = .94, label = "(Random sampling of 7700 points)") +
  annotate("text", x = 0.15, y = 1.1, label = "(1st result is anywhere on the center line)")
p4_c

ggsave("ASTM C1293 Precision Plot2.pdf")
## Saving 7 x 5 in image
ggsave("ASTM C1293 Precision Plot2.png")
## Saving 7 x 5 in image
p2_d <- p1 + geom_line(size = 1.5) + geom_line(aes(y=yH), color = "Red", alpha = 0) + 
  geom_line(aes(y=yL), color = "Red", alpha = 0) +
  labs(title ="ASTM C1293 Precision - Given 1st Result, Range in 2nd Result", 
       x = "%Expansion", y = "%Expansion") +
      theme_bw()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## i Please use `linewidth` instead.
p3_d <- p2_d + geom_point(aes(x=x1,y=y1, color=diff1), size = 0.5, alpha = 0)
p4_d <- p3_d + geom_point(aes(x=x1,y=y2,color=diff2), size = 0.5, alpha = 0) +
  geom_point(aes(x=x1,y=y3,color=diff3), size = 0.5, alpha = 0) +
  geom_point(aes(x=x1,y=y4,color=diff4), size = 0.5, alpha = 0) +
  geom_point(aes(x=x1,y=y5,color=diff5), size = 0.5, alpha = 0) +
  scale_color_gradient(high ="#FF3366", low ="#333333", guide="none") +
  #annotate("text", x = 0.2, y = .6, label = "Upper 95% limit") +
  #annotate("text", x = 0.4, y = .08, label = "Lower 95% limit") +
  #annotate("text", x = 0.15, y = .94, label = "(Random sampling of 7700 points)") +
  annotate("text", x = 0.15, y = 1.1, label = "(1st result is anywhere on the center line)")
p4_d

ggsave("ASTM C1293 Precision Plot1.pdf")
## Saving 7 x 5 in image
ggsave("ASTM C1293 Precision Plot1.png")
## Saving 7 x 5 in image

Let’s Try gganimate

str(ds1)
## 'data.frame':    1540 obs. of  4 variables:
##  $ x  : num  0.04 0.0403 0.0406 0.0409 0.0412 0.0415 0.0418 0.0421 0.0424 0.0427 ...
##  $ yEq: num  0.04 0.0403 0.0406 0.0409 0.0412 0.0415 0.0418 0.0421 0.0424 0.0427 ...
##  $ yH : num  0.0785 0.0791 0.0797 0.0803 0.0809 ...
##  $ yL : num  0.0204 0.0205 0.0207 0.0208 0.021 ...
# First we'll make one long vector
x_long <- c(x1,x1,x1,x1,x1)
str(x_long)
##  num [1:7700] 0.04 0.0403 0.0406 0.0409 0.0412 0.0415 0.0418 0.0421 0.0424 0.0427 ...
y_long <- c(y1,y2,y3,y4,y5)
str(y_long)
##  num [1:7700] 0.0516 0.0521 0.0569 0.0347 0.0359 ...
ds2 <- data.frame(x_long = x_long, y_long = y_long)
str(ds2)
## 'data.frame':    7700 obs. of  2 variables:
##  $ x_long: num  0.04 0.0403 0.0406 0.0409 0.0412 0.0415 0.0418 0.0421 0.0424 0.0427 ...
##  $ y_long: num  0.0516 0.0521 0.0569 0.0347 0.0359 ...
pa1 <- ggplot() + geom_line(data=ds1, aes(x=x, y=yEq), linewidth = 1.5) +
  geom_line(data=ds1, aes(x=x, y=yH), color = "Red") + 
  geom_line(data=ds1, aes(x=x, y=yL), color = "Red") +
  labs(title ="ASTM C1293 Precision - Given 1st Result, Range in 2nd Result", 
       x = "%Expansion", y = "%Expansion") +
      theme_bw() +
  geom_point(data=ds2, aes(x=x_long, y=y_long), size = 0.5, alpha = 0.5) +
  annotate("text", x = 0.2, y = .6, label = "Upper 95% limit") +
  annotate("text", x = 0.4, y = .08, label = "Lower 95% limit") +
  annotate("text", x = 0.15, y = .94, label = "(Random sampling of 7700
           points)") +
  annotate("text", x = 0.15, y = 1.1, label = "(1st result is anywhere on the center line)")
pa1

# pa1 + transition_reveal(x) +
#   exit_fade(alpha = 1) +
#   shadow_wake(wake_length = 1, size = NULL, alpha = NULL, wrap = FALSE)
#p4 + transition_layers(x1)  #+
#   exit_fade(alpha = 1) +
#   shadow_wake(wake_length = 1, size = NULL, alpha = NULL, wrap = FALSE)

Let’s try ordering the values

ds2_ordered <- ds2[order(x_long), ]
str(ds2_ordered)
## 'data.frame':    7700 obs. of  2 variables:
##  $ x_long: num  0.04 0.04 0.04 0.04 0.04 0.0403 0.0403 0.0403 0.0403 0.0403 ...
##  $ y_long: num  0.0516 0.0304 0.063 0.0352 0.0382 ...
gpa1 <- ggplot() + geom_line(data=ds1, aes(x=x, y=yEq), linewidth = 1.5) +
  geom_line(data=ds1, aes(x=x, y=yH), color = "Red") +
  geom_line(data=ds1, aes(x=x, y=yL), color = "Red") +
  labs(title ="ASTM C1293 Precision - Given 1st Result, Range in 2nd Result",
       x = "%Expansion", y = "%Expansion") +
      theme_bw() +
  geom_point(data=ds2_ordered, aes(x=x_long, y=y_long, group=1), size = 0.5,
             alpha = 0.5) +
  annotate("text", x = 0.2, y = .6, label = "Upper 95% limit") +
  annotate("text", x = 0.4, y = .08, label = "Lower 95% limit") +
  annotate("text", x = 0.15, y = .94, label = 
  "(Random sampling of 3000 points)") +
  annotate("text", x = 0.15, y = 1.1, 
           label = "(1st result is anywhere on the center line)") +
  transition_reveal(x_long) + exit_fade(alpha = 1) +
  shadow_wake(wake_length = 1, size = NULL, alpha = NULL, wrap = FALSE)
# gpa1 + transition_reveal(x_long) +
#   exit_fade(alpha = 1) +
#   shadow_wake(wake_length = 1, size = NULL, alpha = NULL, wrap = FALSE)
animate(gpa1, nframes=3000, fps = 50, end_pause=300)