set.seed(80938)
################################################################
# Example: Simulation of rolling a dice
#
# Make a random draw from (1,2,3,4,5,6) with equal probability
# for each outcome
sample(1:6, size=1)


set.seed(9783)

# Simulate a fair dice

# Number of simulated realizations
n <- 30
# Draw independently from the set (1,2,3,4,5,6) with equal probability
xFair <- sample(1:6, size=n, replace=TRUE)
# Count the number of each outcome using the table function
table(xFair)
# Plot the pdf
par(mfrow=c(1,2))
plot(rep(1/6,6), type="h", col="red", ylim=c(0,1), lwd=10)
# Plot the empirical pdf
lines(table(xFair)/n, lwd=4)
# Plot the cdf
plot(cumsum(rep(1/6,6)), ylim=c(0,1), lwd=10, type="h", col="red")
# Add the empirical cdf
lines(cumsum(table(xFair)/n), lwd=4, type="h")


# Simulate an unfair dice

# Number of simulated realizations
n <- 30
# Draw independently from the set (1,2,3,4,5,6) with higher
# probability for a six
xUnfair <- sample(1:6, size=n, replace=TRUE, prob=c(rep(1/7,5),2/7))
# Plot the pdf
plot(c(rep(1/7,5),2/7), type="h", col="red", ylim=c(0,1), lwd=10)
# Plot the empirical density function
lines(table(xUnfair)/n, lwd=4)
# Plot the cdf
plot(cumsum(c(rep(1/7,5),2/7)), ylim=c(0,1), lwd=10, type="h", col="red")
# Add the empirical cdf
lines(cumsum(table(xUnfair)/n), lwd=4, type="h")


set.seed(87)


################################################################
# Random numbers and seed in R
#
# The random numbers generated depends on the seed

# Set the seed
set.seed(127)
# Generate a (pseudo) random sequence
sample(1:10)
# Generate again and see that new numbers are generated
sample(1:10)
# Set the seed and the same numbers as before just after the 
# seed was set are generated
set.seed(127)
sample(1:10)


set.seed(782)


################################################################
# Example: Simulate and estimate the mean
##
# Simulate a fair dice

# Number of realizations
n <- 30
# Simulate rolls with a fair dice
xFair <- sample(1:6, size=n, replace=TRUE)
# Calculate the sample mean
sum(xFair)/n
# or
mean(xFair)


set.seed(672)    

# Simulate an unfair dice

# n realizations
xUnfair <- sample(1:6, size=n, replace=TRUE, prob=c(rep(1/7,5),2/7))
# Calculate the sample mean
mean(xUnfair)


set.seed(76241)    


################################################################
# Example: Simulate and estimate the sample variance
##
# Simulate a fair dice and calculate the sample variance

# Number of realizations
n <- 30
# Simulate
xFair <- sample(1:6, size=n, replace=TRUE)
# Calculate the distance for each sample to the sample mean
distances <- xFair - mean(xFair)
# Calculate the average of the squared distances
sum(distances^2)/(n-1)
# Or use the built in function
var(xFair)


# Plot the pdf of the six-sided dice and the four-sided dice
plot(rep(1/6,6), type="h", col="red")
plot(rep(1/4,4), type="h", col="blue")


# Calculate the means and variances of the dices

# The means
muXSixsided <- sum((1:6)*1/6)  # Six-sided
muXFoursided <- sum((1:4)*1/4)  # Four-sided
# The variances
sum((1:6-muXSixsided)^2*1/6)
sum((1:4-muXFoursided)^2*1/4)


################################################################
# Example: simulation with a binomial distribution
##
# Simulate a binomial distributed experiment

# Number of flips
nFlips <- 10
# The possible outcomes are (0,1,...,nFlips)
xSeq <- 0:nFlips
# Use the dbinom() function which returns the pdf, see ?dbinom
pdfSeq <- dbinom(xSeq, size=nFlips, prob=1/2)
# Plot the density
plot(xSeq, pdfSeq, type="h")


set.seed(572)


################################################################
# Example: Simulate 30 successive dice rolls
#  
# Simulate 30 successive dice rolls
Xfair <- sample(1:6, size=30, replace=TRUE)
# Count the number sixes obtained
sum(Xfair==6)
# This is equivalent to
rbinom(1, size=30, prob=1/6)


################################################################
# Example: Lottery probabilities using the hypergeometric distribution
##
# The probability of getting x numbers of the sheet in 25 drawings

# Number of successes in the population
a <- 8
# Size of the population
N <- 90
# Number of draws
n <- 25
# Plot the pdf, note: parameters names are different in the R function
plot(0:8, dhyper(x=0:8,m=a,n=N-a,k=n), type="h")


################################################################
# Example: Poisson rate scaling
##
# Probability of no goals in 10 minutes

# The Poisson pdf
dpois(x=0, lambda=3.4/9)


################################################################
# Example: Poisson distributed random variable
##
# Simulate a Poisson random variable

# The mean rate of events per interval
lambda <- 4
# Number of realizations
n <- 1000
# Simulate
x <- rpois(n, lambda)
# Plot the empirical pdf
plot(table(x)/n)
# Add the pdf to the plot
lines(0:20, dpois(0:20,lambda), type="h", col="red")


################################################################
# Example: The normal pdf
##
# Play with the normal distribution
  
# The mean and standard deviation
muX <- 0
sigmaX <- 1
# A sequence of x values
xSeq <- seq(-6, 6, by=0.1)
##
pdfX <- 1/(sigmaX*sqrt(2*pi)) * exp(-(xSeq-muX)^2/(2*sigmaX^2))
# Plot the pdf
plot(xSeq, pdfX, type="l", xlab="$x$", ylab="f(x)")


set.seed(5262)


################################################################
# Example: R functions for the normal distribution
#  
# Do it for a sequence of x values
xSeq <- c(-3,-2,1,0,1,2,3)
# The pdf
dnorm(xSeq, mean=0, sd=1)
# The cdf
pnorm(xSeq, mean=0, sd=1)
# The quantiles
qnorm(c(0.01,0.025,0.05,0.5,0.95,0.975,0.99), mean=0, sd=1)
# Generate random normal distributed realizations
rnorm(n=10, mean=0, sd=1)
# Calculate the probability that the outcome of X is between a and b
a <- 0.2
b <- 0.8
pnorm(b) - pnorm(a)
# See more details by running "?dnorm"


################################################################
# Example: Exponential distributed time intervals
#
# Simulate exponential waiting times

# The rate parameter: events per time
lambda <- 4
# Number of realizations
n <- 1000
# Simulate
x <- rexp(n, lambda)
# The empirical pdf
hist(x, probability=TRUE)
# Add the pdf to the plot
curve(dexp(xseq,lambda), xname="xseq", add=TRUE, col="red")


# Check the relation to the Poisson distribution
# by counting the events in each interval

# Sum up to get the running time
xCum <- cumsum(x)
# Use the hist function to count in intervals between the breaks,
# here 0,1,2,...
tmp <- hist(xCum, breaks=0:ceiling(max(xCum)))
# Plot the discrete empirical pdf
plot(table(tmp$counts)/length(tmp$counts))
# Add the Poisson pdf to the plot
lines(0:20, dpois(0:20,lambda), type="h", col="red")


set.seed(783773)


################################################################
# Example: Random numbers in R
#
# Generate 100 normal distributed values
rnorm(100, mean=2, sd=3)
# Similarly, generate 100 uniform distributed values from 0 to 1 and 
# put them through the inverse normal cdf
qnorm(runif(100), mean=2, sd=3)


set.seed(333)  


################################################################
# Example: Simulating the exponential distribution
#
# Three equivalent ways of simulating the exponential distribution
# with lambda=1/2
re1 <- -2*log(1-runif(10000))

re2 <- qexp(runif(10000), 1/2)

re3 <- rexp(10000, 1/2)

# Check the means and variances of each
c(mean(re1), mean(re2), mean(re3)) 

c(var(re1), var(re2), var(re3)) 


################################################################
# Simulation of chi^2-distribution
#
# Simulate 10 realizations from a standard normal distributed variable
n <- 10
rnorm(n)
# Now repeat this 200 times and calculate the sum of squares each time
# Note: the use of the function replicate: it repeats the
#       expression in the 2nd argument k times, see ?replicate
k <- 200
x <- replicate(k, sum(rnorm(n)^2))
# Plot the epdf of the sums and compare to the theoretical chisquare pdf
par(mfrow=c(1,2))
hist(x, freq=FALSE)
curve(dchisq(xseq,df=n), xname="xseq", add=TRUE, col="red")
# and the ecdf compared to the cdf
plot(ecdf(x))
curve(pchisq(xseq,df=n), xname="xseq", add=TRUE, col="red")


################################################################
# Example: Milk dose machines
#
# Chi-square milk dosing precision

# The sample size
n <- 20
# The claimed deviation
sigma <- 0.02
# The observed sample standard deviation
s <- 0.03
# Calculate the chi-square statistic
chiSq <- (n-1)*s^2 / sigma^2
# Use the cdf to calculate the probability of getting the observed 
# sample standard deviation or higher
1 - pchisq(chiSq, df=n-1)


################################################################
# Example: Relation between normal and chi^2
#
# Set simulate parameters
nu <- 8; k <- 200
# Generate the simulated realizations
z <- rnorm(k)
y <- rchisq(k, df=nu)
x <- z/sqrt(y/nu)
# Plot
par(mfrow=c(1,2))
hist(x, freq = FALSE)
curve(dt(xseq, df = nu), xname="xseq", add=TRUE, col="red")
plot(ecdf(x))
curve(pt(xseq, df = nu), xname="xseq", add=TRUE, col="red")


################################################################
# Example: Simulation of t-distribution
#
# Simulate
n <- 8; k <- 200; mu <- 1; sigma <- 2
# Repeat k times the simulation of a normal dist. sample:
# return the values in a (n x k) matrix
x <- replicate(k, rnorm(n, mean=mu, sd=sigma))
xbar <- apply(x, 2, mean)
s <- apply(x, 2, sd)
tobs <- (xbar - mu)/(s/sqrt(n))
# Plot
par(mfrow=c(1,2))
hist(tobs, freq = FALSE)
curve(dt(xseq, df=n-1), xname="xseq", add=TRUE, col="red")
plot(ecdf(tobs))
curve(pt(xseq, df=n-1), xname="xseq", add=TRUE, col="red")


################################################################
# Example: Electric car driving distance
#
# Calculate the probability of getting the sample mean under the 
# conditions that the claim is actually the real mean

# A test of 10 cars was carried out
n <- 10
# The claim is that the real mean is 400 km
muX <- 400
# From the sample the sample mean was calculated to
xMean <- 393
# And the sample deviation was
xSD <- 14
# Use the cdf to calculate the probability of obtaining this 
# sample mean or a lower value
pt( (xMean-muX) / (xSD/sqrt(n)), df=n-1)


################################################################
# Example: t-distribution
#
# Plot the t-distribution for different sample sizes

# First plot the standard normal distribution
curve(dnorm(x), xlim=c(-5,5), xlab="x", ylab="Density")
# Add the t-distribution for 30 observations
curve(dt(x,df=30-1), add=TRUE, col=2)
# Add the t-distribution for 15, 5 and 2 observations
curve(dt(x,df=15-1), add=TRUE, col=3)
curve(dt(x,df=5-1), add=TRUE, col=4)
curve(dt(x,df=2-1), add=TRUE, col=5)
# Add a legend
legend("topright", c("Norm.",paste("n=",c(30,15,5,2))), lty=1, col=1:6, cex=0.8)


################################################################
# Example: F-distribution
#
# Simulate
nu1 <- 8; nu2 <- 10; k <- 200
u <- rchisq(k, df=nu1)
v <- rchisq(k, df=nu2)
fobs <- (u/nu1) / (v/nu2)
# Plot
par(mfrow=c(1,2))
hist(fobs, freq = FALSE)
curve(df(x, df1=nu1, df2=nu2), add=TRUE, col="red")
plot(ecdf(fobs))
curve(pf(x, df1=nu1, df2=nu2), add=TRUE, col="red")


################################################################
# Example: Relation between normal and F-distribution
#
# Simulate
n1 <- 8; n2 <- 10; k <- 200
mu1 <- 2; mu2 <- -1
sigma1 <- 2; sigma2 <- 4
s1 <- replicate(k, sd(rnorm(n1, mean=mu1, sd=sigma1)))
s2 <- replicate(k, sd(rnorm(n2, mean=mu2, sd=sigma2)))
fobs <- (s1^2 / sigma1^2) / (s2^2 / sigma2^2)
# Plot
par(mfrow=c(1,2))
hist(fobs, freq=FALSE)
curve(df(xseq, df1=n1-1, df2=n2-1), xname="xseq", add=TRUE, col="red")
plot(ecdf(fobs))
curve(pf(xseq, df1=n1-1, df2=n2-1), xname="xseq", add=TRUE, col="red")