################################################################ # Example: Normal and t probabilities and quantiles # # The P(T>1.96) probability for n=10 1 - pt(1.96, df = 9) # The P(Z>1.96) probability 1 - pnorm(1.96) # The P(T>1.96) probability for n-values, 10, 20, ... ,50 1 - pt(1.96, df = seq(9, 49, by = 10)) # The P(T>1.96) probability for n-values, 100, 200, ... ,500 1 - pt(1.96, df = seq(99, 499, by = 100)) # The standard normal 0.975% quantile qnorm(0.975) # The t-quantiles for n-values: 10, 20, ... ,50 # (rounded to 3 decimal points) qt(0.975, df = seq(9, 49, by = 10)) # The t-quantiles for n-values: 100, 200, ... ,500 # (rounded to 3 decimal points) qt(0.975, df = seq(99, 499, by = 100)) ################################################################ # Example: Student heights # # The t-quantiles for n=10: qt(0.975, 9) ################################################################ # Example: Student heights # # The 99% confidence interval for the mean x <- c(168, 161, 167, 179, 184, 166, 198, 187, 191, 179) n <- length(x) mean(x) - qt(0.995, df = 9) * sd(x) / sqrt(n) mean(x) + qt(0.995, df = 9) * sd(x) / sqrt(n) # The 99% confidence interval for the mean t.test(x, conf.level = 0.99) set.seed(1234) ################################################################ # Example: Central Limit Theorem in practice # par(mfrow=c(2,2)) xlim <- c(0,1) # Number of simulated samples k <- 1000 # Number of observations in each sample n <- 1 # Simulate k samples with n observations # Note, the use of replicate: it repeats the second argument (here k times) Xbar <- replicate(k, runif(n)) hist(Xbar, col="blue", main="n=1", xlab="Sample means", xlim=xlim) # Increase the number of observations in each sample # Note, the use of apply here: it takes the mean on the 2nd dimension # (i.e. column) of the matrix returned by replicate n <- 2 Xbar <- apply(replicate(k, runif(n)), 2, mean) hist(Xbar, col="blue", main="n=2", xlab="Sample means", xlim=xlim) # Increase the number of observations in each sample n <- 6 Xbar <- apply(replicate(k, runif(n)), 2, mean) hist(Xbar, col="blue", main="n=6", xlab="Sample means", xlim=xlim) # Increase the number of observations in each sample n <- 30 Xbar <- apply(replicate(k, runif(n)), 2, mean) hist(Xbar, col="blue", main="n=30", xlab="Sample means", xlim=xlim) # We set a seed to be able get the same results set.seed(12345.6789) ################################################################ # Example: Simulating many confidence intervals # # Simulate 1000 samples of n=50 observations, and # calculate a CI from each sample k <- 1000 ThousandCIs <- replicate(k, t.test(rnorm(n=50, mean=1, sd=1))$conf.int) # Count how often 1 is covered sum(ThousandCIs[1,] < 1 & 1 < ThousandCIs[2,]) ################################################################ # Example: The chi-square-distribution # # The chisquare-distribution with df=9 (the density) x <- seq(0, 35, by = 0.1) plot(x, dchisq(x, df = 9), type = "l", ylab="Density") # Reading the data into R: xA <- c(.7,-1.6,-.2,-1.2,-1,3.4,3.7,.8,0,2) xB <- c(1.9,.8,1.1,.1,-.1,4.4,5.5,1.6,4.6,3.4) dif <- xB-xA dif t.test(dif) ################################################################ # Example: Sleeping medicine # # Enter sleep difference observations x <- c(1.2, 2.4, 1.3, 1.3, 0.9, 1.0, 1.8, 0.8, 4.6, 1.4) n <- length(x) # Compute the tobs - the observed test statistic tobs <- (mean(x) - 0) / (sd(x) / sqrt(n)) tobs # Compute the p-value as a tail-probability in the t-distribution pvalue <- 2 * (1-pt(abs(tobs), df=n-1)) pvalue t.test(x) ################################################################ # Example: Student heights # # The height sample x <- c(168,161,167,179,184,166,198,187,191,179) # Using histograms par(mfrow=c(1,3), mar=c(4,3,1,1)) hist(x, xlab="Height", main="") hist(x, xlab="Height", main="", breaks=8) hist(x, xlab="Height", main="", breaks=2) # Plot the empirical cdf plot(ecdf(x), verticals = TRUE) # Plot the best normal cdf xseq <- seq(0.9*min(x), 1.1*max(x), length.out = 100) lines(xseq, pnorm(xseq, mean(x), sd(x))) # The expected quantiles in a 0 to 1 uniform distribution n <- length(x) # They have equal distance pseq <- (1:n-0.5)/n # Plot the expected normal distribution quantiles plot(x=qnorm(p=pseq), y=sort(x), xlab="Normal quantiles", ylab="Sample quantiles") # Mark the 1st and 3rd quantiles with crosses points(x=qnorm(p=c(0.25,0.75)), y=quantile(x,probs=c(0.25,0.75)), pch=3, col="red") # Add a straight line through the 1st and 3rd quantiles qqline(x) set.seed(89473) # Simulate 100 normal distributed observations xsim <- rnorm(100, mean(x), sd(x)) # Do the q-q normal plot with inbuilt functions qqnorm(xsim) qqline(xsim) set.seed(98) # Do the Wally plot # To install the MESS package (run only once) #install.packages("MESS") # Load the MESS package library(MESS) # Define the plotting function qqwrap <- function(x, y, ...){ stdy <- (y-mean(y))/sd(y) qqnorm(stdy, main="", ...) qqline(stdy)} # Do the Wally plot wallyplot(x-mean(x), FUN=qqwrap, ylim=c(-3,3)) ################################################################ # Example: Radon in houses # # Reading in the sample radon <- c(2.4, 4.2, 1.8, 2.5, 5.4, 2.2, 4.0, 1.1, 1.5, 5.4, 6.3, 1.9, 1.7, 1.1, 6.6, 3.1, 2.3, 1.4, 2.9, 2.9) # A histrogram and q-q plot par(mfrow = c(1,2)) hist(radon) qqnorm(radon, ylab = "Sample quantiles", xlab = "Normal quantiles") qqline(radon) # Transform using the natural logarithm logRadon <- log(radon) par(mfrow = c(1,2)) hist(logRadon) qqnorm(logRadon, ylab = "Sample quantiles", xlab = "Normal quantiles") qqline(logRadon) # A confidence interval and t-test t.test(logRadon, conf.level=0.95) # Back transform to original scale, now we get the median! exp(0.9644) # And the confidence interval on the original scale exp(c(0.7054, 1.2234)) ################################################################ # Example: Nutrition study # # Load the two samples xA <- c(7.53, 7.48, 8.08, 8.09, 10.15, 8.4, 10.88, 6.13, 7.9) xB <- c(9.21, 11.51, 12.79, 11.85, 9.97, 8.79, 9.69, 9.68, 9.19) # Summary statistics c(mean(xA), mean(xB)) c(var(xA), var(xB)) c(length(xA), length(xB)) t.test(xB,xA) # Check computations with 3 decimal points: ms=round(c(mean(xA), mean(xB)),3) vs=round(c(var(xA), var(xB)),3) ns=c(length(xA), length(xB)) (ms[2]-ms[1])/sqrt(vs[1]/ns[1]+vs[2]/ns[2]) nu=((vs[1]/ns[1]+vs[2]/ns[2])^2)/((vs[1]/ns[1])^2/(ns[1]-1)+(vs[2]/ns[2])^2/(ns[2]-1)) nu # Keep the summary statistics ms <- c(mean(xA), mean(xB)) vs <- c(var(xA), var(xB)) ns <- c(length(xA), length(xB)) # The observed statistic t_obs <- (ms[2]-ms[1])/sqrt(vs[1]/ns[1]+vs[2]/ns[2]) # The degrees of freedom nu <- ((vs[1]/ns[1]+vs[2]/ns[2])^2)/ ((vs[1]/ns[1])^2/(ns[1]-1)+(vs[2]/ns[2])^2/(ns[2]-1)) # Print the result t_obs nu # The probability of observed greater that t_obs 1 - pt(t_obs, df = nu) # Use the built in function for t-test t.test(xB, xA) # The critical values for the test qt(0.975, df = 15.99) ################################################################ # Example: Overlapping confidence intervals? # # The confidence intervals and joining the lower and upper limits CIA <- t.test(xA)$conf.int CIB <- t.test(xB)$conf.int lower <- c(CIA[1], CIB[1]) upper <- c(CIA[2], CIB[2]) # The barplot of means WITH CIs added using gplots-package # First install the package with: install.packages("gplots") library(gplots) barplot2(c(mean(xA),mean(xB)), plot.ci=TRUE, ci.l=lower, ci.u=upper, col = 2:3) # This chunk makes the plot, since gplots is not working on the compute server CIA <- t.test(xA)$conf.int CIB <- t.test(xB)$conf.int lower <- c(CIA[1], CIB[1]) upper <- c(CIA[2], CIB[2]) # barplot(c(mean(xA),mean(xB)), col = 2:3, xlim=c(0,2.5), ylim=c(0,11.5)) arrows(0.7,lower[1],0.7,lower[2], code=3, angle=90, length=0.3) arrows(1.4+0.5,upper[1],1.4+0.5,upper[2], code=3, angle=90, length=0.3) # The confidence intervals CIA CIB ################################################################ # Example: Sleeping medicine # # Read the samples x1 <- c(.7,-1.6,-.2,-1.2,-1,3.4,3.7,.8,0,2) x2 <- c(1.9,.8,1.1,.1,-.1,4.4,5.5,1.6,4.6,3.4) # Take the differences dif <- x2 - x1 # t-test on the differences t.test(dif) # Give both samples, but make paired t-test t.test(x2, x1, paired = TRUE) # WRONG analysis t.test(x1, x2) # The sample variances of each sample and of the differences var(x1) var(x2) var(x1-x2) # The sample size for power=0.80 power.t.test(power=0.8, delta=4, sd=12.21, sig.level=0.05, type="one.sample") # The power for n=50 power.t.test(n=50, delta=4, sd=12.21, sig.level=0.05, type="one.sample") # The detectable effect size for n=50 and power=0.80 power.t.test(n=50, power=0.80, sd=12.21, sig.level=0.05, type="one.sample") ################################################################ # Example: Two-sample power and sample size computations in R # # Finding the power of detecting a group difference of 2 # with sigma=1 for n=10 power.t.test(n=10, delta=2, sd=1, sig.level=0.05) # Finding the sample size for detecting a group difference of 2 # with sigma=1 and power=0.9 power.t.test(power=0.90, delta=2, sd=1, sig.level=0.05) # Finding the detectable effect size (delta) # with sigma=1, n=10 and power=0.9 power.t.test(power=0.90, n=10, sd=1, sig.level=0.05) par(mfrow = c(3, 3)) par(cex=0.8) for (i in 1:9){ xr <- rnorm(9) qqnorm(xr, main="") qqline(xr) }