Code
library(tidyverse)
library(sn)
library(latex2exp)library(tidyverse)
library(sn)
library(latex2exp)# Using the sn package, create the skew normal for mean of 1 (and adjustments made elsewhere)
adj_mean_1260 <- 0.046 # 0.135 [C1293 ---> (23% 1s%,65% d2s%)]
adj_sd_1260 <- 0.077 # 0.141 [C1293] --> gamma=0.53
gamma_1260 <- 0.43 #[C1293] --> gamma=0.53
cp_one <- c(mean=1+adj_mean_1260,
s.d.=.152+adj_sd_1260,
gamma1=gamma_1260) # gamma1=0.53 [C1293]
dp_one <- cp2dp(cp_one, family="SN")
skew_one <- makeSECdistr(dp_one, family="SN", name="Skew_1")
upper_test <- (2+43/100)/(2-43/100)
lower_test <- (2-43/100)/(2+43/100)
upper_test[1] 1.547771
lower_test[1] 0.6460905
# Let's plot
# Need data frame
dum_count <- seq(1,100000,1) # 100,000 samples
samp_one <- rsn(n=100000, dp=dp_one)
df_skew_one <- data.frame(x=dum_count, y=samp_one)
# Let's get some quantiles
q1_lower <- quantile(df_skew_one$y,probs=c(0.025))
q1_upper <- quantile(df_skew_one$y,probs=c(0.975))
# Let's turn into usable text
q1_text_lower <- sprintf("%.3f%% (at 2.5%%)", q1_lower)
q1_text_upper <- sprintf("%.3f%% (at 97.5%%)", q1_upper)
# Make the plot
sp_one <- ggplot(df_skew_one, aes(x=y)) + geom_density(adjust = 3)
sp_one + xlim(0,3) + theme_classic() +
labs(x=TeX(r"(%Expansion of Second Result ($Y_2$); First Result ($Y_1$) is 1%)"),
y="Density",
title="Fitted Skew Normal Distribution \nwith nominal Mean=1 and CV%=15.2%") +
theme(axis.text=element_text(color="black", size=12),
axis.title=element_text(size=14)) +
geom_vline(xintercept=q1_lower, color="black",
linetype = "dotted") +
geom_vline(xintercept=q1_upper, color="black",
linetype = "dotted") +
annotate("text", x = 0.3, y = 0.85, size=5,
label = q1_text_lower) +
annotate("text", x = 2.1, y = 0.85, size=5,
label = q1_text_upper)#Let's make another at mean=0.5 to test it
cp_one_half <- c(mean=0.5+adj_mean_1260*0.5,
s.d.=.152*0.5+adj_sd_1260*0.5,
gamma1=gamma_1260) # gamma1=0.53 [C1293]
dp_one_half <- cp2dp(cp_one_half, family="SN")
skew_one_half <- makeSECdistr(dp_one_half, family="SN", name="Skew_1_half")
upper_test_half <- 0.5*(2+43/100)/(2-43/100)
lower_test_half <- 0.5*(2-43/100)/(2+43/100)
upper_test_half[1] 0.7738854
lower_test_half[1] 0.3230453
samp_one_half <- rsn(n=100000, dp=dp_one_half)
df_skew_one_half <- data.frame(x=dum_count,
y=samp_one_half)
# Let's get some quantiles
q1_lower_half <- quantile(df_skew_one_half$y,
probs=c(0.025))
q1_upper_half <- quantile(df_skew_one_half$y,
probs=c(0.975))
# Let's turn into usable text
q1_text_lower_half <- sprintf("%.3f%% (at 2.5%%)",
q1_lower_half)
q1_text_upper_half <- sprintf("%.3f%% (at 97.5%%)",
q1_upper_half)
# Make the plot
sp_one_half <- ggplot(df_skew_one_half, aes(x=y)) + geom_density(adjust = 3)
sp_one_half + xlim(0,1.5) + theme_classic() +
labs(x=TeX(r"(%Expansion of Second Result ($Y_2$); First Result ($Y_1$) is 0.5%)"),
y="Density",
title="Fitted Skew Normal Distribution \nwith nominal Mean=0.5 and CV%=15.2%") +
theme(axis.text=element_text(color="black", size=12),
axis.title=element_text(size=14)) +
geom_vline(xintercept=q1_lower_half, color="black",
linetype = "dotted") +
geom_vline(xintercept=q1_upper_half, color="black",
linetype = "dotted") +
annotate("text", x = 0.3, y = 0.85, size=5,
label = q1_text_lower_half) +
annotate("text", x = 1.0, y = 0.85, size=5,
label = q1_text_upper_half)For a test method whose results follow the trend of a constant CV% (e.g., ASTM C1260 and ASTM C1293), we’ll construct a function to create the dp parameters (i.e., create a dp object) for a given mean and CV% that we can use to sample the distributions generated by the sn package.
To use it, we pass in the mean we’re interested in, the CV%, the adjustment for the mean (at a mean=1), the adjustment to the standard deviation (at a mean of 1), and the gamma value.
create_dp <- function(mean_par, cv_par, adj_m, adj_sd, gamma_par){
dist_mean <- mean_par
dist_sd <- cv_par/100*dist_mean
cp_dist <- c(mean=dist_mean+adj_m*dist_mean,
sd=dist_sd+adj_sd*dist_mean,
gamma=gamma_par)
dp_par <- cp2dp(cp=cp_dist, family="SN")
}This function will return a vector of length 2 with the upper and lower bounds based on the values of \(Y_1\) (the value from the first laboratory) and the CV% that is passed into it.
up_lo <- numeric(length = 2)
bounds_upper_lower <- function(first_y, cv_pc){
up_lo[1] <- first_y*(2+cv_pc/100)/(2-cv_pc/100)
up_lo[2] <- first_y*(2-cv_pc/100)/(2+cv_pc/100)
return(up_lo)
}
# Let's try it out
first_y_bounds <- bounds_upper_lower(1, 43)
first_y_bounds[1] 1.5477707 0.6460905
We’ll create a plot on a 2D map, with the x-axis for C1293 expansion vs C1260 expansion on the y-axis.
# Let's make a data frame of three points
x_ex <- c(0.14, 0.2, 0.3)
y_ex <- c(0.5, 0.75, 0.37)
df_ex <- data.frame(x_ex=x_ex, y_ex=y_ex)
# Need another data frame for the fig.3 plot
x_fig3_upper <- c(0.04, 0.3)
y_fig3_upper <- c(0.1, 1)
x_fig3_lower <- c(0.04, 0.6)
y_fig3_lower <- c(0.1, 0.6)
df_fig3 <- data.frame(x_up=x_fig3_upper, y_up=y_fig3_upper,
x_lo=x_fig3_lower, y_lo=y_fig3_lower)
p_fig3 <- ggplot() + geom_line(data=df_fig3, aes(x=x_up, y=y_up)) +
geom_line(data=df_fig3, aes(x=x_lo, y=y_lo))
p_fig3 + geom_point(data=df_ex, aes(x=x_ex, y=y_ex),
size=3) + theme_classic()We’ll try the “first” result of C1293 expansion of 0.30% and a C1260 expansion of 0.37%.
x1_c1293 <- 0.40 # initial C1293 value
y1 <- 0.50 # initial C1260 value
# C1293 first
# adj_mean_one <- 0.046 # 0.135 [C1293 ---> (23% 1s%,65% d2s%)]
# adj_sd_one <- 0.077 # 0.141 [C1293] --> gamma=0.53
adj_mean_1293 <- 0.135
adj_sd_1293 <- 0.141
gamma_1293 <- 0.53 #[C1293] --> gamma=0.53
mean_1293_1 <- 0.30
dp_1293_1 <- create_dp(mean_1293_1, 23, adj_mean_1293,
adj_sd_1293, gamma_1293)
# now let's sample the specific C1293 distribution
num_samp_1 <- 5000
samp_1293_1 <- rsn(n=num_samp_1, dp=dp_1293_1)
# now let's sample the specific C1260 distribution
mean_1260_1 <- 0.37
dp_1260_1 <- create_dp(mean_1260_1, 15.2, adj_mean_1260,
adj_sd_1260, gamma_1260)
samp_1260_1 <- rsn(n=num_samp_1, dp=dp_1260_1)
# let's do the other example points
mean_1293_2 <- 0.14
dp_1293_2 <- create_dp(mean_1293_2, 023, adj_mean_1293,
adj_sd_1293, gamma_1293)
samp_1293_2 <- rsn(n=num_samp_1, dp=dp_1293_2)
mean_1260_2 <- 0.5
dp_1260_2 <- create_dp(mean_1260_2, 15.2, adj_mean_1260,
adj_sd_1260, gamma_1260)
samp_1260_2 <- rsn(n=num_samp_1, dp=dp_1260_2)
mean_1293_3 <- 0.20
dp_1293_3 <- create_dp(mean_1293_3, 23, adj_mean_1293,
adj_sd_1293, gamma_1293)
samp_1293_3 <- rsn(n=num_samp_1, dp=dp_1293_3)
mean_1260_3 <- 0.75
dp_1260_3 <- create_dp(mean_1260_3, 15.2, adj_mean_1260,
adj_sd_1260, gamma_1260)
samp_1260_3 <- rsn(n=num_samp_1, dp=dp_1260_3)df_cloud_1 <- data.frame(x=samp_1293_1, y=samp_1260_1)
# Let's make the other data frames
df_cloud_2 <- data.frame(x=samp_1293_2, y=samp_1260_2)
df_cloud_3 <- data.frame(x=samp_1293_3, y=samp_1260_3)
# let's calculate the out of bounds values
out <- numeric(length=num_samp_1)
# head(out)
# str(out)
# out <- 0
# head(out)
# str(out)
df_cloud_1$out <- out
for(i in 1:nrow(df_cloud_1)){
if(df_cloud_1$x[i] > mean_1293_1*
(2+0.65)/(2-0.65) |
df_cloud_1$x[i] < mean_1293_1*
(2-0.65)/(2+0.65) |
df_cloud_1$y[i] > mean_1260_1*
(2+0.43)/(2-0.43) |
df_cloud_1$y[i] < mean_1260_1*
(2-0.43)/(2+0.43)) {
df_cloud_1$out[i] <- 1 # out of bounds
}
}
df_cloud_2$out <- out
for(i in 1:nrow(df_cloud_2)){
if((df_cloud_2$x[i] > mean_1293_2*
(2+0.65)/(2-0.65)) |
(df_cloud_2$x[i] < mean_1293_2*
(2-0.65)/(2+0.65)) |
(df_cloud_2$y[i] > mean_1260_2*
(2+0.43)/(2-0.43)) |
(df_cloud_2$y[i] < mean_1260_2*
(2-0.43)/(2+0.43))) {
df_cloud_2$out[i] <- 1 # out of bounds
}
}
df_cloud_3$out <- out
count_this <- 0
for(i in 1:nrow(df_cloud_3)){
if(df_cloud_3$x[i] > mean_1293_3*
(2+0.65)/(2-0.65)){
df_cloud_3$out[i] <- 1 # out of bounds
count_this <- count_this+1
}
if(df_cloud_3$x[i] < mean_1293_3*
(2-0.65)/(2+0.65)){
df_cloud_3$out[i] <- 1 # out of bounds
count_this <- count_this+1
}
if(df_cloud_3$y[i] > mean_1260_3*
(2+0.43)/(2-0.43)){
df_cloud_3$out[i] <- 1 # out of bounds
count_this <- count_this+1
}
if(df_cloud_3$y[i] < mean_1260_3*
(2-0.43)/(2+0.43)) {
df_cloud_3$out[i] <- 1 # out of bounds
count_this <- count_this+1
}
}
colrs <- c("0"="slategrey", "1"="red")
p_cloud_1 <- ggplot(df_cloud_1, aes(x=x, y=y)) +
geom_point(aes(color = as.factor(out)),alpha=0.6)
p_cloud_1 + theme_classic() +
scale_colour_manual(values = colrs) +
theme(legend.position = "none") + labs(x=("C1293"),
y=("C1260"))p_fig3_plus <- p_fig3 + geom_point(data=df_cloud_1,
aes(x=x, y=y, color=as.factor(out)),
alpha=0.1) +
geom_point(data=df_cloud_2,
aes(x=x, y=y, color=as.factor(out)),
alpha=0.1) +
geom_point(data=df_cloud_3,
aes(x=x, y=y, color=as.factor(out)),
alpha=0.1)
p_fig3_plus + theme_classic() + geom_point(data=df_ex,
aes(x=x_ex, y=y_ex), size=3) +
scale_colour_manual(values = colrs) +
theme(legend.position = "none") + xlim(0, 0.75) + labs(x=("C1293"), y=("C1260"))# color=as.factor(out)