Probability Distributions in R

This page is a loose collection of code snippets that illustrate how R can be used to work on different aspects of probability distributions.

Published

March 2, 2024

Note

Some of the scripts are copied from this website

Example 1

Display the Student’s t distributions with various degrees of freedom and compare them to the normal distribution

x <- seq(-4, 4, length=100)
hx <- dnorm(x)

degf <- c(1, 3, 8, 30)
colors <- c("red", "blue", "darkgreen", "gold", "black")
labels <- c("df=1", "df=3", "df=8", "df=30", "normal")

plot(x, hx, type="l", lty=2, xlab="x value",
     ylab="Density", main="Comparison of t Distributions")

for (i in 1:4){
  lines(x, dt(x,degf[i]), lwd=2, col=colors[i])
}

legend("topright", inset=.05, title="Distributions",
       labels, lwd=2, lty=c(1, 1, 1, 1, 2), col=colors)

Example 2

Children’s IQ scores are normally distributed with a mean of 100 and a standard deviation of 15. What proportion of children are expected to have an IQ between 80 and 120?

mean=100; sd=15
lb=80; ub=120

x <- seq(-4,4,length=100)*sd + mean
hx <- dnorm(x,mean,sd)

plot(x, hx, type="n", xlab="IQ Values", ylab="",
     main="Normal Distribution", axes=FALSE)

i <- x >= lb & x <= ub
lines(x, hx)
polygon(c(lb,x[i],ub), c(0,hx[i],0), col="red") 

area <- pnorm(ub, mean, sd) - pnorm(lb, mean, sd)
result <- paste("P(",lb,"< IQ <",ub,") =",
                signif(area, digits=3))
mtext(result,3)
axis(1, at=seq(40, 160, 20), pos=0)

Example 3

Cumulative Sum

m<-c(20:40) # egg cluster sizes to be examined
pm<-c(rep(0,6),1/30*((26:30)-25),(36-((31:35)))/30,rep(0,5)) # PMF
cm<-cumsum(pm) # CDF

barplot(pm, xlab="Cluster Size",, ylab="Probability",names.arg=m, main="CDF")

barplot(cm, xlab="Cluster Size",, ylab="Probability",names.arg=m, main="CDF")

cmEasy<-cumsum(m)
m

 [1] 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40

cmEasy

 [1]  20  41  63  86 110 135 161 188 216 245 275 306 338 371 405 440 476 513 551
[20] 590 630

barplot(cmEasy, xlab="Cluster Size")

Example 4

According to a study 86% of the countries households own a cellular phone (one or more). In a random sample of 20 households, what is the probability that exactly 15 own a cell phone?

dbinom(15,20,0.86)

[1] 0.08680819

Example 5

In a random sample of 20 households, what is the probability that the number of households owning a cell phone is between 15 and 17?

dbinom(15,20,0.86)+dbinom(16,20,0.86)+dbinom(17,20,0.86)

[1] 0.4943078

pbinom(17, size=20, prob=0.86)-pbinom(14, size=20, prob=0.86)

[1] 0.4943078

Example 6

1 - Construct a binomial probability histogram with \(n=10\) and \(p=0,2\). 2 - Construct a binomial probability histogram with \(n=10\) and \(p=0,5\). 3 - Construct a binomial probability histogram with \(n=10\) and \(p=0,8\). Comment on the shape of the distribution.

First of all, we define a function for plotting a Binomial probability.

hist.binom <- function(n, p, ...)
{
    # plots a Binomial probability  distribution
    k <- 0:n
    p <- dbinom(k, n, p)
    names(p) <- as.character(0:n)
    barplot(p,space=0, ...)
}

To use the hist.binom() function, you must specify the values of n and p.

hist.binom <- function(n, p, ...)
{
    # plots a Binomial probability  distribution
    k <- 0:n
    p <- dbinom(k, n, p)
    names(p) <- as.character(0:n)
    barplot(p,space=0, ...)
}

For example, to get the histogram of a binomial \((6,1/3)\) distribution, use hist.binom(6,1/3).

Try experimenting with the col (color) argument; use help(colors) or colors() to see what colors R knows by name.

hist.binom <- function(n, p, ...)
{
    # plots a Binomial probability  distribution
    k <- 0:n
    p <- dbinom(k, n, p)
    names(p) <- as.character(0:n)
    barplot(p,space=0, ...)
}

An alternative way to plot a Binomial probability distribution

n<-300
k <- 0:n
probs <- dbinom(k, n, 0.2)
names(probs) <- as.character(0:n)
probs

            0             1             2             3             4 
 8.452712e-30  6.339534e-28  2.369401e-26  5.884012e-25  1.092220e-23 
            5             6             7             8             9 
 1.616485e-22  1.986930e-21  2.086276e-20  1.910247e-19  1.549422e-18 
           10            11            12            13            14 
 1.127205e-17  7.429304e-17  4.473060e-16  2.477387e-15  1.269661e-14 
           15            16            17            18            19 
 6.052051e-14  2.695054e-13  1.125581e-12  4.424160e-12  1.641596e-11 
           20            21            22            23            24 
 5.766106e-11  1.922035e-10  6.093726e-10  1.841365e-09  5.313105e-09 
           25            26            27            28            29 
 1.466417e-08  3.877545e-08  9.837476e-08  2.397885e-07  5.622626e-07 
           30            31            32            33            34 
 1.269776e-06  2.764836e-06  5.810475e-06  1.179703e-05  2.316034e-05 
           35            36            37            38            39 
 4.400464e-05  8.098076e-05  1.444522e-04  2.499403e-04  4.197715e-04 
           40            41            42            43            44 
 6.847522e-04  1.085583e-03  1.673607e-03  2.510410e-03  3.665769e-03 
           45            46            47            48            49 
 5.213539e-03  7.225284e-03  9.761821e-03  1.286323e-02  1.653844e-02 
           50            51            52            53            54 
 2.075574e-02  2.543596e-02  3.044978e-02  3.562050e-02  4.073270e-02 
           55            56            57            58            59 
 4.554656e-02  4.981655e-02  5.331245e-02  5.584020e-02  5.725986e-02 
           60            61            62            63            64 
 5.749845e-02  5.655585e-02  5.450342e-02  5.147545e-02  4.765501e-02 
           65            66            67            68            69 
 4.325608e-02  3.850447e-02  3.361957e-02  2.879912e-02  2.420796e-02 
           70            71            72            73            74 
 1.997156e-02  1.617415e-02  1.286070e-02  1.004192e-02  7.701063e-03 
           75            76            77            78            79 
 5.801468e-03  4.293849e-03  3.122800e-03  2.232001e-03  1.568051e-03 
           80            81            82            83            84 
 1.082935e-03  7.353265e-04  4.909650e-04  3.223806e-04  2.082041e-04 
           85            86            87            88            89 
 1.322709e-04  8.266929e-05  5.083686e-05  3.076208e-05  1.831899e-05 
           90            91            92            93            94 
 1.073696e-05  6.194403e-06  3.518017e-06  1.967063e-06  1.082931e-06 
           95            96            97            98            99 
 5.870626e-07  3.134058e-07  1.647804e-07  8.533270e-08  4.352829e-08 
          100           101           102           103           104 
 2.187297e-08  1.082820e-08  5.281402e-09  2.538150e-09  1.201960e-09 
          105           106           107           108           109 
 5.609148e-10  2.579679e-10  1.169294e-10  5.223928e-11  2.300445e-11 
          110           111           112           113           114 
 9.986024e-12  4.273299e-12  1.802798e-12  7.498363e-13  3.074987e-13 
          115           116           117           118           119 
 1.243364e-13  4.957378e-14  1.949055e-14  7.556717e-15  2.889333e-15 
          120           121           122           123           124 
 1.089519e-15  4.051931e-16  1.486262e-16  5.377125e-17  1.918853e-17 
          125           126           127           128           129 
 6.754363e-18  2.345265e-18  8.032994e-19  2.714273e-19  9.047578e-20 
          130           131           132           133           134 
 2.975261e-20  9.652565e-21  3.089552e-21  9.756481e-22  3.039799e-22 
          135           136           137           138           139 
 9.344567e-23  2.834290e-23  8.482181e-24  2.504702e-24  7.297873e-25 
          140           141           142           143           144 
 2.098138e-25  5.952166e-26  1.666187e-26  4.602405e-27  1.254475e-27 
          145           146           147           148           149 
 3.374105e-28  8.955245e-29  2.345421e-29  6.061646e-30  1.545923e-30 
          150           151           152           153           154 
 3.890573e-31  9.662020e-32  2.367831e-32  5.726126e-33  1.366462e-33 
          155           156           157           158           159 
 3.217798e-34  7.477254e-35  1.714530e-35  3.879394e-36  8.661541e-37 
          160           161           162           163           164 
 1.908246e-37  4.148360e-38  8.898489e-39  1.883422e-39  3.933367e-40 
          165           166           167           168           169 
 8.105120e-41  1.647878e-41  3.305624e-42  6.542381e-43  1.277506e-43 
          170           171           172           173           174 
 2.461078e-44  4.677488e-45  8.770291e-46  1.622250e-46  2.960141e-47 
          175           176           177           178           179 
 5.328253e-48  9.460677e-49  1.656955e-49  2.862436e-50  4.877335e-51 
          180           181           182           183           184 
 8.196632e-52  1.358558e-52  2.220719e-53  3.579848e-54  5.690791e-55 
          185           186           187           188           189 
 8.920700e-56  1.378872e-56  2.101489e-57  3.157822e-58  4.678255e-59 
          190           191           192           193           194 
 6.832715e-60  9.837679e-61  1.396233e-61  1.953279e-62  2.693311e-63 
          195           196           197           198           199 
 3.660140e-64  4.901973e-65  6.469609e-66  8.413759e-67  1.078145e-67 
          200           201           202           203           204 
 1.361158e-68  1.692983e-69  2.074323e-70  2.503493e-71  2.975966e-72 
          205           206           207           208           209 
 3.484058e-73  4.016814e-74  4.560152e-75  5.097285e-76  5.609452e-77 
          210           211           212           213           214 
 6.076907e-78  6.480114e-79  6.801063e-80  7.024572e-81  7.139460e-82 
          215           216           217           218           219 
 7.139460e-83  7.023774e-84  6.797201e-85  6.469812e-86  6.056217e-87 
          220           221           222           223           224 
 5.574473e-88  5.044772e-89  4.488029e-90  3.924509e-91  3.372625e-92 
          225           226           227           228           229 
 2.847995e-93  2.362827e-94  1.925652e-95  1.541366e-96  1.211554e-97 
          230           231           232           233           234 
 9.350040e-99 7.083363e-100 5.266725e-101 3.842675e-102 2.750633e-103 
          235           236           237           238           239 
1.931295e-104 1.329811e-105 8.977629e-107 5.941078e-108 3.853001e-109 
          240           241           242           243           244 
2.448261e-110 1.523814e-111 9.287708e-113 5.542048e-114 3.236647e-115 
          245           246           247           248           249 
1.849512e-116 1.033772e-117 5.650172e-119 3.018741e-120 1.576050e-121 
          250           251           252           253           254 
8.037853e-123 4.002915e-124 1.945861e-125 9.229382e-127 4.269497e-128 
          255           256           257           258           259 
1.925460e-129 8.461492e-131 3.621650e-132 1.509021e-133 6.117653e-135 
          260           261           262           263           264 
2.411767e-136 9.240487e-138 3.438731e-139 1.242127e-140 4.352150e-142 
          265           266           267           268           269 
1.478089e-143 4.862133e-145 1.547870e-146 4.764899e-148 1.417070e-149 
          270           271           272           273           274 
4.067517e-151 1.125696e-152 3.000478e-154 7.693533e-156 1.895305e-157 
          275           276           277           278           279 
4.479811e-159 1.014450e-160 2.197364e-162 4.544909e-164 8.959497e-166 
          280           281           282           283           284 
1.679906e-167 2.989156e-169 5.034926e-171 8.006066e-173 1.198091e-174 
          285           286           287           288           289 
1.681531e-176 2.204805e-178 2.688786e-180 3.034221e-182 3.149710e-184 
          290           291           292           293           294 
2.986794e-186 2.565974e-188 1.977206e-190 1.349629e-192 8.033504e-195 
          295           296           297           298           299 
4.084833e-197 1.725014e-199 5.808127e-202 1.461777e-204 2.444443e-207 
          300 
2.037036e-210

plot(k,probs, type="h", ylim=c(0,max(probs)))

plot(k,probs, type="s", ylim=c(0,max(probs)))

plot(k,probs, type="h", ylim=c(0,max(probs)), xlim=c(40,80))

Example 7

For a fixed \(p\), as the number of trials in a binominal experiment increases, the probability distribution of the random variable X becomes bell shaped.

hist.binom <- function(n, p, ...)
{
    # plots a Binomial probability  distribution
    k <- 0:n
    p <- dbinom(k, n, p)
    names(p) <- as.character(0:n)
    barplot(p,space=0, ...)
}

Example 8

According to a study 86% of the countries households own a cellular phone (one or more). In a simple random sample of 300 households, 275 owned at least one cell phone. Is this result unusual?

n <- 300
p <- 0.86

dbinom(275,300,0.86)

[1] 0.0008531712

The answer cannot be found by simply calculating the probability for a particular value to occur! Examine the probabilities of all possible outcomes:

sampleSpace <- c(0,seq(1:300))
sampleSpaceProbs <- dbinom(sampleSpace,300,0.86)
sampleSpaceProbs

  [1] 6.893038e-257 1.270289e-253 1.166579e-250 7.118351e-248 3.246731e-245
  [6] 1.180697e-242 3.565986e-240 9.200243e-238 2.069890e-235 4.125324e-233
 [11] 7.374312e-231 1.194255e-228 1.766787e-226 2.404384e-224 3.027806e-222
 [16] 3.546282e-220 3.880329e-218 3.982065e-216 3.845853e-214 3.506377e-212
 [21] 3.026254e-210 2.478646e-208 1.930930e-206 1.433685e-204 1.016466e-202
 [26] 6.893381e-201 4.478804e-199 2.792025e-197 1.672223e-195 9.634643e-194
 [31] 5.346309e-192 2.860399e-190 1.477064e-188 7.368694e-187 3.554621e-185
 [36] 1.659500e-183 7.503970e-182 3.288999e-180 1.398319e-178 5.770499e-177
 [41] 2.312940e-175 9.009988e-174 3.413069e-172 1.257960e-170 4.513544e-169
 [46] 1.577304e-167 5.371162e-166 1.783095e-164 5.773302e-163 1.823892e-161
 [51] 5.624362e-160 1.693610e-158 4.981727e-157 1.431944e-155 4.023461e-154
 [56] 1.105459e-152 2.970921e-151 7.812256e-150 2.010598e-148 5.065927e-147
 [61] 1.249957e-145 3.020974e-144 7.153582e-143 1.660085e-141 3.776323e-140
 [66] 8.422446e-139 1.842182e-137 3.952247e-136 8.318815e-135 1.718188e-133
 [71] 3.483013e-132 6.930986e-131 1.354155e-129 2.598070e-128 4.895707e-127
 [76] 9.062187e-126 1.648057e-124 2.945100e-123 5.172265e-122 8.928470e-121
 [81] 1.515129e-119 2.527888e-118 4.147233e-117 6.691251e-116 1.061838e-114
 [86] 1.657538e-113 2.545504e-112 3.846261e-111 5.718816e-110 8.368014e-109
 [91] 1.205127e-107 1.708367e-106 2.384020e-105 3.275373e-104 4.430713e-103
 [96] 5.901843e-102 7.741778e-101  1.000160e-99  1.272653e-98  1.595132e-97
[101]  1.969532e-96  2.395753e-95  2.871213e-94  3.390500e-93  3.945182e-92
[106]  4.523809e-91  5.112148e-90  5.693663e-89  6.250226e-88  6.763023e-87
[111]  7.213599e-86  7.584955e-85  7.862619e-84  8.035576e-83  8.097001e-82
[116]  8.044698e-81  7.881228e-80  7.613709e-79  7.253302e-78  6.814447e-77
[121]  6.313909e-76  5.769735e-75  5.200194e-74  4.622797e-73  4.053469e-72
[126]  3.505903e-71  2.991147e-70  2.517409e-69  2.090067e-68  1.711865e-67
[131]  1.383224e-66  1.102658e-65  8.672091e-65  6.729021e-64  5.151503e-63
[136]  3.891157e-62  2.899975e-61  2.132494e-60  1.547273e-59  1.107739e-58
[141]  7.825385e-58  5.454777e-57  3.751943e-56  2.546523e-55  1.705514e-54
[146]  1.127151e-53  7.350747e-53  4.730481e-52  3.004038e-51  1.882492e-50
[151]  1.164097e-49  7.103526e-49  4.277471e-48  2.541720e-47  1.490372e-46
[156]  8.623556e-46  4.923798e-45  2.774172e-44  1.542350e-43  8.461444e-43
[161]  4.580512e-42  2.446733e-41  1.289605e-40  6.706851e-40  3.441643e-39
[166]  1.742574e-38  8.705371e-38  4.290877e-37  2.086694e-36  1.001190e-35
[171]  4.739246e-35  2.213232e-34  1.019668e-33  4.634393e-33  2.077867e-32
[176]  9.190109e-32  4.009483e-31  1.725469e-30  7.324244e-30  3.066479e-29
[181]  1.266261e-28  5.156991e-28  2.071297e-27  8.204341e-27  3.204661e-26
[186]  1.234351e-25  4.688070e-25  1.755609e-24  6.482146e-24  2.359638e-23
[191]  8.468085e-23  2.995815e-22  1.044746e-21  3.591265e-21  1.216746e-20
[196]  4.062952e-20  1.337043e-19  4.335935e-19  1.385560e-18  4.362574e-18
[201]  1.353333e-17  4.135985e-17  1.245183e-16  3.692611e-16  1.078563e-15
[206]  3.102654e-15  8.789419e-15  2.451817e-14  6.734078e-14  1.820917e-13
[211]  4.847107e-13  1.270027e-12  3.275199e-12  8.312107e-12  2.075807e-11
[216]  5.100555e-11  1.232971e-10  2.931858e-10  6.857013e-10  1.577158e-09
[221]  3.567039e-09  7.931878e-09  1.733886e-08  3.725467e-08  7.866723e-08
[226]  1.632283e-07  3.327504e-07  6.663384e-07  1.310549e-06  2.531166e-06
[231]  4.799783e-06  8.934662e-06  1.632336e-05  2.926396e-05  5.147098e-05
[236]  8.879917e-05  1.502383e-04  2.492199e-04  4.052441e-04  6.457745e-04
[241]  1.008254e-03  1.541965e-03  2.309307e-03  3.385897e-03  4.858802e-03
[246]  6.822155e-03  9.369568e-03  1.258307e-02  1.651891e-02  2.119121e-02
[251]  2.655562e-02  3.249549e-02  3.881406e-02  4.523558e-02  5.141794e-02
[256]  5.697742e-02  6.152417e-02  6.470468e-02  6.624527e-02  6.598950e-02
[261]  6.392279e-02  6.017909e-02  5.502745e-02  4.884022e-02  4.204805e-02
[266]  3.508915e-02  2.836153e-02  2.218543e-02  1.678098e-02  1.226268e-02
[271]  8.648759e-03  5.881339e-03  3.851906e-03  2.426842e-03  1.469011e-03
[276]  8.531712e-04  4.747200e-04  2.526617e-04  1.284082e-04  6.219877e-05
[281]  2.865586e-05  1.252874e-05  5.185402e-06  2.025998e-06  7.449720e-07
[286]  2.569126e-07  8.277155e-08  2.480263e-08  6.877315e-09  1.754174e-09
[291]  4.087313e-10  8.628102e-11  1.633599e-11  2.739922e-12  4.007368e-13
[296]  5.006785e-14  5.195264e-15  4.298150e-16  2.658012e-17  1.092160e-18
[301]  2.236327e-20

How many outcomes have a corresponding probability greater than \(0.05\)?

i <- 1
while(i < 301)
{ if (sampleSpaceProbs[i] >= 0.05)
  {
      print(
            paste(as.character(sampleSpace[i]), 
                  "    ", 
                  as.character(sampleSpaceProbs[i])
                  )
            );
  }
  i <- i+1;
}

[1] "254      0.051417944661263"
[1] "255      0.0569774199103519"
[1] "256      0.0615241671465016"
[1] "257      0.0647046827355092"
[1] "258      0.066245270419688"
[1] "259      0.0659894971748243"
[1] "260      0.063922793142426"
[1] "261      0.0601790937082499"
[1] "262      0.0550274482817531"

==> only 9 values have probabilities slightly higher than 5%

With an increasing number of possible outcomes, the probability of any particular outcome to occur will decrease!

Try this:

sampleSpace <- c(0,seq(1:3000))
sampleSpaceProbs <- dbinom(sampleSpace,3000,0.86)

i <- 1
while(i < 3001)
{ if (sampleSpaceProbs[i] >= 0.05)
  {
    print(
          paste(as.character(sampleSpace[i]), 
          "    ", 
          as.character(sampleSpaceProbs[i])
                  )
          );
  }
  i <- i+1;
}

==> none of the values has a probability of at least 5% to occur

The approach: check for the range of values around the expected value that add up to 95% of all possible occurrences. Values outside that range are considered unusual.

The Empirical Rule states that in a bell-shaped distribution about 95% of all observations lie between ?? - 2?? and ?? + 2??. Any observation that lies outside this intervall occurs less than 5% of the time and must be considered unusual.

Can we assume that the given binimial distribution is bell-shaped \(n*p*(1-p)>9\)? ==> TRUE ==> It’s bell-shaped distribution. Thus we can apply the Empirical Rule!

–> Expected Value or Mean � of the given binomial dist.

expV_Cellphones <- n*p

–> Standard Deviation ?? of the given binomial dist.

sd_Cellphones <- sqrt(n*p*(1-p))

Is 275 smaller than ?? - 2??

expV_Cellphones - 2*sd_Cellphones

[1] 245.98

275 < expV_Cellphones - 2*sd_Cellphones

Is 275 greater than ?? - 2??

expV_Cellphones + 2*sd_Cellphones

[1] 270.02

275 > expV_Cellphones + 2*sd_Cellphones

==> unusual result

Visualization of the result

The hist() function does not work here, since we do not have counts but probabilities/relative frequencies we can create a barplot instead, that looks like a histogram

barplot(dbinom(229:290,n,p), space = 0, width = 1, names.arg = as.character(229:290))

We create our own hist function for binomial probability distributions

hist.binom <- function(n, p, ...)
{
    # plots a Binomial probability  distribution
    k <- 0:n
    p <- dbinom(k, n, p)
    names(p) <- as.character(0:n)
    barplot(p,space=0, ...)
}

Example 8b

According to the American Red Cross, 7% of people in the United States have blood type 0-negative. What is the probability that in a single random sample of 500 people in the US fewer than 30 have blood type 0-negative?

n <- 500
p <- 0.07

pbinom(29,n,p)

[1] 0.1677677

The answer can also be approximated using the normal dist. if the given binomial dist. approx. the shape of a normal dist.

Can we assume that the given binimial distribution is normal \(n*p*(1-p)>9\)? ==> TRUE ==> approx. normal distribution

Determine the necessary parameters for the normal dist. –> Expected Value or Mean � of the given binomial dist.

expV_bloodType <- n*p

–> Standard Deviation ?? of the given binomial dist.

sd_bloodType <- sqrt(n*p*(1-p))

Probability that fewer than 30 have that blood type in a sample of 500

pnorm(29.5, expV_bloodType, sd_bloodType)

[1] 0.1675173

Visualisation of the result

The hist() function does not work here, since we do not have counts but probabilities/relative frequencies we can create a barplot instead, that looks like a histogram

bp <- barplot(dbinom(15:55,n,p), space = 0, width = 1, 
        #names.arg = as.character(15:55),
        col="yellow", main="Binom. Dist. (n=500, p=0.07) with a superimposed normal curve", 
        axes=FALSE, xlab="No of people with blood typ 0-negative", ylab="Prob. Density")
axis(1, labels = c(15:55), at = bp, tcl = -0.25) 
axis(2) 
bp

      [,1]
 [1,]  0.5
 [2,]  1.5
 [3,]  2.5
 [4,]  3.5
 [5,]  4.5
 [6,]  5.5
 [7,]  6.5
 [8,]  7.5
 [9,]  8.5
[10,]  9.5
[11,] 10.5
[12,] 11.5
[13,] 12.5
[14,] 13.5
[15,] 14.5
[16,] 15.5
[17,] 16.5
[18,] 17.5
[19,] 18.5
[20,] 19.5
[21,] 20.5
[22,] 21.5
[23,] 22.5
[24,] 23.5
[25,] 24.5
[26,] 25.5
[27,] 26.5
[28,] 27.5
[29,] 28.5
[30,] 29.5
[31,] 30.5
[32,] 31.5
[33,] 32.5
[34,] 33.5
[35,] 34.5
[36,] 35.5
[37,] 36.5
[38,] 37.5
[39,] 38.5
[40,] 39.5
[41,] 40.5

dbinom(15:55,n,p)

 [1] 0.0000464146 0.0001058989 0.0002269358 0.0004583453 0.0008751878
 [6] 0.0015842782 0.0027256399 0.0044667989 0.0069873349 0.0104528276
[11] 0.0149801382 0.0205992389 0.0272195439 0.0346097964 0.0423992463
[16] 0.0501040555 0.0571773648 0.0630757018 0.0673300747 0.0696084801
[21] 0.0697581758 0.0678204487 0.0640165118 0.0587089742 0.0523476213
[26] 0.0454101543 0.0383479404 0.0315442736 0.0252890575 0.0197702090
[31] 0.0150792131 0.0112265764 0.0081624479 0.0057981905 0.0040257790
[36] 0.0027332009 0.0018152188 0.0011797421 0.0007505927 0.0004676632
[41] 0.0002854437

# color the interesting area differently
barplot( dbinom(15:29, n, p),
        space=0, 
        col="skyblue",
        add=TRUE)

# add a vertical line for x = 29
abline(v = 14.5, col = "red", lty = 1)
text(x=12.5, y=0.06,
     pos=4, paste("29"),cex=0.75, col = "red")

xy <- xy.coords(x=0.5:39.5, y= dnorm(15:54,expV_bloodType,sd_bloodType))
lines(xy, 
      lwd=2, col=2)

Example 9

Create a plot for a uniform distribution

xUnif <- seq(-1, 32, length=4000)
hxUnif <- dunif(xUnif,min=0, max=30)
plot(xUnif, hxUnif, type="l", lty=1, xlab="x value",xlim = range(-2,32),
     ylab="Density",ylim = range(0,0.04), main="Uniform Distribution",col="blue")

Example 10

Create a uniform population and simulate 1000 samples of size 3, 12 and 30 and calculate the sampling means.

# Create a uniform population with 10000 individuals  
dataUnif <- runif(10000,min=0, max=30)
n_Hist <- hist(dataUnif, breaks=16, xlab="x",
                main="Uniform Distribution of x", freq=F, col= "grey",
                xlim = range(-2,32))

N <- 1000 # number of samples
n1 <- 3   # sample size n1
n2 <- 12  # sample size n2
n3 <- 30  # sample size n3

vec_n1 <- NULL
vec_n2 <- NULL
vec_n3 <- NULL

while (N > 0) {
  N<-N-1;
  samp_n1<-sample(dataUnif, n1);
  samp_n2<-sample(dataUnif, n2);
  samp_n3<-sample(dataUnif, n3);

  vec_n1<-c(vec_n1,mean(samp_n1));
  vec_n2<-c(vec_n2,mean(samp_n2));
  vec_n3<-c(vec_n3,mean(samp_n3));
}

Create frequency histograms for the sampling means.

Version 1: A diagram with a plot of x in the population is added

par(mfrow=c(2,2), font.main= 1, cex.main=1) # Splits graphics output into a table of 2 rows and 2 columns
plot(xUnif, hxUnif, type="l", lty=1, xlab="x value",xlim = range(-2,32),
     ylab="Density",ylim = range(0,0.04), main="Uniform Distribution",col="blue")
n1_Hist <- hist(vec_n1, breaks=16, xlab="Sample Means",
                main="Distribution of the sample means with n = 3", freq=F, col= "grey",
                xlim = range(-2,32))
n2_Hist <- hist(vec_n2, breaks=16, xlab="Sample Means",
                main="Distribution of the sample means with n = 12", freq=F, col= "grey",
                xlim = range(-2,32))
n3_Hist <- hist(vec_n3, breaks=16, xlab="Sample Means",
                main="Distribution of the sample means with n = 30", freq=F, col= "grey",
                xlim = range(-2,32))

par(mfrow=c(1,1)) # This line resets the graphics device to a 1x1 table

Version 2: A histogram of the population data of x is added

par(mfrow=c(2,2), font.main= 1, cex.main=1) # Splits graphics output into a table of 2 rows and 2 columns
n_Hist <- hist(dataUnif, breaks=16, xlab="x",
               main="Uniform Distribution of X", freq=F, col= "grey",
               xlim = range(-2,32))
n1_Hist <- hist(vec_n1, breaks=16, xlab="Sample Means",
                main="Distribution of the sample means with n = 3", freq=F, col= "grey",
                xlim = range(-2,32))
n2_Hist <- hist(vec_n2, breaks=16, xlab="Sample Means",
                main="Distribution of the sample means with n = 12", freq=F, col= "grey",
                xlim = range(-2,32))
n3_Hist <- hist(vec_n3, breaks=16, xlab="Sample Means",
                main="Distribution of the sample means with n = 30", freq=F, col= "grey",
                xlim = range(-2,32))

par(mfrow=c(1,1)) # This line resets the graphics device to a 1x1 table

Example 11

Create histograms of 1000 sampling means from samples with \(n = 9\) and \(n = 25\) that are taken from a population that is not normal. Make sure that each is appropriately labelled. Determine the mean and the sd of the frequency dist.

#Generate some non-normally distributed data (f distribution, Fisher-Dist.).
population <- rf(10000,10,10)
hist(population)

# Simulate 1000 samples of size 4, 10 and 35 and calculate the sampling means of scores that are not normally distr. 
N <- 1000
n1 <- 4
n2 <- 10
n3 <- 35
vec_n1 <- NULL
vec_n2 <- NULL
vec_n3 <- NULL

while (N>0)
{
  N<-N-1;
  samp_n1<-rf(n1,3,27);
  samp_n2<-rf(n2,3,27);
  samp_n3<-rf(n3,3,27);
  
  vec_n1<-c(vec_n1,mean(samp_n1));
  vec_n2<-c(vec_n2,mean(samp_n2));
  vec_n3<-c(vec_n3,mean(samp_n3));
}

par(mfrow=c(2,2), font.main= 1, cex.main=1) # Splits graphics output into a table of 2 rows and 2 columns

pop_Hist <- hist(population, breaks=16, 
                 xlab="Scores from a Fisher Distribution (Population)",
                 xlim = range(0 ,6),
                 main="Fisher Distribution", 
                 freq = F  )
n1_Hist <- hist(vec_n1, breaks=16, ylab="", xlab="Sample Means",
                xlim = range(0,5),main="Distribution of the sample means with n = 4", freq=F)
n2_Hist <- hist(vec_n2, breaks=16,  ylab="", xlab="Sample Means",
                xlim = range(0,4),main="Distribution of the sample means with n = 10", freq=F)
n3_Hist <- hist(vec_n3, breaks=16,  ylab="", xlab="Sample Means",
                xlim = range(0, 3),ylim=range(0,2.7), 
                main="Distribution of the sample means with n = 35", freq=F)

par(mfrow=c(1,1)) # This line resets the graphics device to a 1x1 table

Example 12

Simulate 1000 samples of size 9 and 25 and calculate the sampling means of normally distributed IQ Scores (\(�=100\), \(Sigma=15\))

N <- 1000
n1 <- 9
n2 <- 25
vec_n1 <- NULL
vec_n2 <- NULL

while (N>0)
{
  N<-N-1;
  samp_n1<-rnorm(n1, mean=100, sd=15);
  samp_n2<-rnorm(n2, mean=100, sd=15);
  
  vec_n1<-c(vec_n1,mean(samp_n1));
  vec_n2<-c(vec_n2,mean(samp_n2));
}

par(mfrow=c(1,2)) # Splits graphics output into a table of 3 rows and 2 columns
hist(vec_n1, breaks=16, xlab="Sample Means, n=9",
     xlim = range(80,120),main="")
hist(vec_n1, breaks=16, xlab="Sample Means, n=25",
     xlim = range(80,120),main="")

par(mfrow=c(1,1)) # This line resets the graphics device to a 1x1 table

Example 13

Create histograms for a normally distributed variable (IQ Scores) and the means of 1000 simulated sample with \(n = 9\). Make sure that each is appropriately labelled. Determine the mean and the sd of the frequency dist.

# simulate a population of size 10000
population <-rnorm(10000, mean=100, sd=15);

par(mfrow=c(1,2)) # Splits graphics output into a table of 3 rows and 2 columns

pop_Hist <- hist(population, breaks=16, xlab="IQ Scores (Population)",
                 xlim = range(40,160),ylim=c(0,2800),main="")
mean_pop <- round(mean(population), digits=2)
sd_pop <- round(sd(population), digits=2)
text(c(70),c(2750),paste("mean=",mean_pop),cex=0.75)
text(c(68),c(2600),paste("sd=",sd_pop),cex=0.75)


n1_Hist <- hist(vec_n1, breaks=16, xlab="Sample Means, n=9",
                xlim = range(40,160), ylim=c(0,180),main="")
mean_n1 <- round(mean(vec_n1), digits=2)
sd_n1 <- round(sd(vec_n1), digits=2)

# if we would want to superimpose a normalcurve (density vs. random variable) 
# we first would have to change our histograms to freq=FALSE)
# otherwise we are going to get a line that looks like it is attached to the 
# very bottom of the bars 
#lines(n1_Hist$mids, dnorm(n1_Hist$mids,mean_n1,sd_n1), lwd=2, col=3)
text(c(72),c(175),paste("mean=",mean_n1),cex=0.75)
text(c(70),c(165),paste("sd=",sd_n1),cex=0.75)

par(mfrow=c(1,1)) # This line resets the graphics device to a 1x1 table

Example 14

Create histograms with superimposed density curves for a normaly distributed variable (IQ Scores) and the means of 1000 simulated sample with n= 9. Make sure that each is appropriately labelled. Determine the mean and the sd of the frequency dist.

# Version 2 with densities and superposed normal curves
population <-rnorm(10000, mean=100, sd=15);

par(mfrow=c(1,2)) # Splits graphics output into a table of 3 rows and 2 columns

if we want to superimpose a normalcurve (density vs. random variable), we have to change our histograms to freq=FALSE otherwise we are going to get a line that looks like it is attached to the very bottom of the bars

pop_Hist <- hist(population, breaks=16, xlab="IQ Scores (Population)",xlim = range(40,160),ylim=c(0, 0.028), main="", freq=F)
lines(pop_Hist$mids, dnorm(pop_Hist$mids,100,15), lwd=2, col=2)
mean_pop <- round(mean(population), digits=2)
sd_pop <- round(sd(population), digits=2)
text(c(68),c(0.028),paste("mean=",mean_pop),cex=0.75)
text(c(66),c(0.026),paste("sd=",sd_pop),cex=0.75)

n1_Hist <- hist(vec_n1, breaks=16, xlab="Sample Means, n=9",xlim = range(40,160),ylim=c(0,0.09), main="", freq=F)
lines(pop_Hist$mids, dnorm(pop_Hist$mids,100,15), lwd=2, col=2)
mean_n1 <- round(mean(vec_n1), digits=2)
sd_n1 <- round(sd(vec_n1), digits=2)
lines(n1_Hist$mids, dnorm(n1_Hist$mids,mean_n1,sd_n1), lwd=2, col=3)
text(c(68),c(0.09),paste("mean=",mean_n1),cex=0.75)
text(c(66),c(0.083),paste("sd=",sd_n1),cex=0.75)

par(mfrow=c(1,1)) # This line resets the graphics device to a 1x1 table

Example 15

Create histograms of 1000 sampling means from samples with \(n = 9\) and \(n = 25\) and a normally distributed population. Make sure that each is appropriately labelled. Determine the mean and the sd of the frequency dist.

par(mfrow=c(1,2)) # Splits graphics output into a table of 3 rows and 2 columns

n1_Hist <- hist(vec_n1, breaks=16, freq=FALSE, xlab="n=9",xlim = range(70,120),main="")
mean_n1 <- round(mean(vec_n1), digits=2)
sd_n1 <- round(sd(vec_n1), digits=2)
text(c(80),c(0.08),paste("mean=",mean_n1),cex=0.75)
text(c(80),c(0.073),paste("sd=",sd_n1),cex=0.75)
lines(n1_Hist$mids, dnorm(n1_Hist$mids,mean_n1,sd_n1), lwd=2, col=2)

n2_Hist <- hist(vec_n2, breaks=16, freq=FALSE, ylab="", xlab="n=25",xlim = range(70,120),main="")
mean_n2 <- round(mean(vec_n2), digits=2)
sd_n2 <- round(sd(vec_n2), digits=2)
text(c(82),c(0.11),paste("mean=",mean_n2),cex=0.75)
text(c(80),c(0.1),paste("sd=",sd_n2),cex=0.75)
lines(n2_Hist$mids, dnorm(n2_Hist$mids,mean_n2,sd_n2), lwd=2, col=3)
lines(n1_Hist$mids, dnorm(n1_Hist$mids,mean_n1,sd_n1), lwd=2, col=2)

# Text is written in one of the four margins of the current figure region or ...
mtext("Hist. of the Sampling Dist. of the Mean",side=3, line=2, outer=F,col=2,cex=1.5,adj = 1.5)

par(mfrow=c(1,1)) # This line resets the graphics device to a 1x1 table

Example 16

Create histograms of 1000 sampling means from samples with n= 9 and n=25 and a population that is not normaly distributed. Make sure that each is appropriately labelled. Determine the mean and the sd of the frequency dist.

#Generate some non-normally distributed data (f distribution, Fisher-Dist.).
population <- rf(10000,3,27)
hist(population)

par(mfrow=c(1,4)) # Splits graphics output into a table of 3 rows and 2 columns

pop_Hist <- hist(population, breaks=16, 
                 xlab="Scores from a Fisher Distribution (Population)",
                 xlim = range(0 ,9),
                 main="", 
                 freq = F  )
lines(pop_Hist$mids, df(pop_Hist$mids,3,27), lwd=2, col=2)
mean_pop <- round(mean(population), digits=2)
sd_pop <- round(sd(population), digits=2)
text(c(6),c(0.3),paste("mean=",mean_pop),cex=0.75)
text(c(6),c(0.26),paste("sd=",sd_pop),cex=0.75)

# Simulate 1000 samples of size 4, 10 and 35 and calculate the sampling means of scores that are not normally distr. 
N <- 1000
n1 <- 4
n2 <- 10
n3 <- 35
vec_n1 <- NULL
vec_n2 <- NULL
vec_n3 <- NULL

while (N>0)
{
  N<-N-1;
  samp_n1<-rf(n1,3,27);
  samp_n2<-rf(n2,3,27);
  samp_n3<-rf(n3,3,27);
  
  vec_n1<-c(vec_n1,mean(samp_n1));
  vec_n2<-c(vec_n2,mean(samp_n2));
  vec_n3<-c(vec_n3,mean(samp_n3));
}


n1_Hist <- hist(vec_n1, breaks=16, ylab="", xlab="Sample Means, n=4",xlim = range(0,3),main="", freq=F)
mean_n1 <- round(mean(vec_n1), digits=2)
sd_n1 <- round(sd(vec_n1), digits=2)
text(c(2.3),c(0.4),paste("mean=",mean_n1),cex=0.75)
text(c(2.3),c(0.3),paste("sd=",sd_n1),cex=0.75)

n2_Hist <- hist(vec_n2, breaks=16,  ylab="", xlab="Sample Means, n=10",xlim = range(0,3),main="", freq=F)
mean_n2 <- round(mean(vec_n2), digits=2)
sd_n2 <- round(sd(vec_n2), digits=2)
text(c(2.3),c(0.4),paste("mean=",mean_n2),cex=0.75)
text(c(2.3),c(0.3),paste("sd=",sd_n2),cex=0.75)


n3_Hist <- hist(vec_n3, breaks=16,  ylab="", xlab="Sample Means, n=35",xlim = range(0.5,2),ylim=range(0,2.7) , main="", freq=F)
mean_n3 <- round(mean(vec_n3), digits=2)
sd_n3 <- round(sd(vec_n3), digits=2)
text(c(1.7),c(1.4),paste("mean=",mean_n3),cex=0.75)
text(c(1.7),c(1.3),paste("sd=",sd_n3),cex=0.75)
# create theoretical scores to draw a normal curve into the plot
temp_x <- seq(-4, 4, length=10000)
lines(temp_x, dnorm(temp_x,mean_n3,sd_n3), lwd=2, col=3)

# Text is written in one of the four margins of the current figure region or ...
mtext("Population Scores Following a Fisher Distribution \r with 3 sampling Distributions of the Mean (n=4, n=10 and n=35)",
      side=3, line=2, outer=F,col=2,cex=0.75,adj = 1)

par(mfrow=c(1,1)) # This line resets the graphics device to a 1x1 table

Example 17

Display the Student’s t distributions with various degrees of freedom and compare to the normal distribution

x <- seq(-4, 4, length=100)
hx <- dnorm(x)

degf <- c(1, 3, 8, 30)
colors <- c("red", "blue", "darkgreen", "gold", "black")
labels <- c("df=1", "df=3", "df=8", "df=30", "normal")

plot(x, hx, type="l", lty=2, xlab="x value",
     ylab="Density", main="Comparison of t Distributions")

for (i in 1:4){
  lines(x, dt(x,degf[i]), lwd=2, col=colors[i])
}

legend("topright", inset=.05, title="Distributions",
       labels, lwd=2, lty=c(1, 1, 1, 1, 2), col=colors)

Example 18

Assesing Normality in R

#Generate some normally distributed data.
y <- rnorm(1000,17,3)
hist(y)

qqnorm(y)
qqline(y, col="green")

#Perform the Shapiro-Wilks test for normality
shapiro.test(y)


    Shapiro-Wilk normality test

data:  y
W = 0.99807, p-value = 0.311

#Perform Anderson-Darling test for normality
library(nortest)
ad.test(y)


    Anderson-Darling normality test

data:  y
A = 0.49311, p-value = 0.2165

#Generate some non-normally distributed data (f distribution, Fisher-Dist.).
y <- rf(1000,3,27)
hist(y)

qqnorm(y)
qqline(y, col="green")

#Perform the Shapiro-Wilks test for normality
shapiro.test(y)


    Shapiro-Wilk normality test

data:  y
W = 0.82243, p-value < 2.2e-16

#Perform Anderson-Darling test for normality
library(nortest)
ad.test(y)


    Anderson-Darling normality test

data:  y
A = 36.413, p-value < 2.2e-16

## The following data represent the finishing times 
# (in seconds) for six randomly selected races of a 
# greyhound named Barbies Bomber. 
# Is there evidence to support the belief that the 
# variable "finishing time" is normally distributed?
finishTimes <- c(31.35,32.06,31.91,32.52,31.26,32.37)
qqnorm(finishTimes)
qqline(finishTimes, col="green")

Example 19

1 - Construct a Poisson probability histogram with lambda=1. 2 - Construct a Poisson probability histogram with lambda=10. 3 - Construct a Poisson probability histogram with lambda=30.

To use the hist.pois() function, you must specify the values of lambda (l, the average number of events/observations/… per unit of time/space/…) and the maximum number of counts per unit you want to be considered. For example, to get the histogram of a poisson(lambda=1) distribution, use hist.pois(1,15)

hist.pois <- function(l, maxCounts,...)
{
  # plots a Poisson probability distriburion distribution
  k <- 0:maxCounts
  p <- dpois(k,l)
  names(p) <- as.character(0:maxCounts)
  barplot(p,space=0, ...)
}

par(mfrow=c(1,3)) # Splits graphics output into a table of  3 columns
hist.pois(1,15,col="yellow", main="Poisson Dist. with lambda=1")
hist.pois(10,20,col="yellow", main="Poisson Dist. with lambda=10")
hist.pois(30,60,col="yellow", main="Poisson Dist. with lambda=30")

par(mfrow=c(1,1)) # back to default

Example 20

Construct a standard normal probability distribution and plot its density function

x <- seq(-4, 4, length = 1000)  
y <- dnorm(x)
a <- -1
a1 <- pnorm(a)
plot(x, y, axes = FALSE, type = 'l', xlab = '', ylab = '',
        main = expression(italic(P(X<=a))));
abline(h = 0)

x1 <- x[x <= a]
y1 <- dnorm(x1)
xcoord <- c(-4, x1, x1[length(x1)], -4) 
ycoord <- c(0, y1, 0, 0)
polygon(xcoord, ycoord, col = 'grey90')

axis(1, at = c(-1), font = 8,labels = c('a'))
labText <- paste("P(X<=a) = \r", as.character(round(a1, digits=4)) )
text(-2.5,0.2, labels = labText)

Example 21

Construct a uniform probability distribution and plot its density function

x <- seq(0, 10, length = 1000)  
y <- dunif(x,0,10)
plot(x, y, axes = TRUE, xaxt="n", type = 'l', xlab = 'X', ylab = 'Density', ylim=c(0,0.15),
     main = "Uniform Distribution", bty="n")
#abline(v = 0)
pts <- list(x=c(0,0),y=c(0,0.1))
lines(pts)
lines(list(x=c(10,10),y=c(0,0.1)))
axis(1,pos=c(0,0))

Simulate 1000 samples of size 3, 12 and 30 and calculate the sampling means of scores that are not normally distr.

N <- 1000
n1 <- 3
n2 <- 12
n3 <- 30
n4 <- 1

vec_n1 <- NULL
vec_n2 <- NULL
vec_n3 <- NULL
vec_n4 <- NULL

while (N>0)
{
  N<-N-1;
  samp_n1<-sample(x, n1);
  samp_n2<-sample(x, n2);
  samp_n3<-sample(x, n3);
  samp_n4<-sample(x, n4);
  
  vec_n1<-c(vec_n1,mean(samp_n1));
  vec_n2<-c(vec_n2,mean(samp_n2));
  vec_n3<-c(vec_n3,mean(samp_n3));
  vec_n4<-c(vec_n4,mean(samp_n4));
}


n1_Hist <- hist(vec_n1, breaks=16, xlab="Sample Means",main="Distribution of the sample mean with n = 3", freq=F, col= "grey")

n2_Hist <- hist(vec_n2, breaks=16, xlab="Sample Means",main="Distribution of the sample mean with n = 12", freq=F, col= "grey")

n3_Hist <- hist(vec_n3, breaks=16, xlab="Sample Means",main="Distribution of the sample mean with n = 30", freq=F, col= "grey")

n4_Hist <- hist(vec_n4, breaks=16, xlab="Sample Means",main="Distribution of the sample mean with n = 1", freq=F, col= "grey")

#Setup up coordinate system (with x == y aspect ratio):
plot(c(-2,3), c(-1,5), type = "n", xlab = "x", ylab = "y", asp = 1)
#the x- and y-axis, and an integer grid
abline(h = 0, v = 0, col = "gray60")
text(1,0, "abline( h = 0 )", col = "gray60", adj = c(0, -.1))
abline(h = -1:5, v = -2:3, col = "lightgray", lty = 3)
abline(a = 1, b = 2, col = 2)
text(1,3, "abline( 1, 2 )", col = 2, adj = c(-.1, -.1))

pts <- list(x = cars[,1], y = cars[,2])
plot(pts)

Example 22

Visualization the mean weight gain during pregnancy is 30 pounds, with a standard deviation of 12.9 pounds. Weight gain during pregnancy is skewed right. In a certain neighborhood the mean of a sample with 35 persons is 36.2. Is this result unusual?

With a sample size n > 29 we can assume that the sampling distribution of the sample mean is normal with: - mean of the sampling distr.= mean of the population = 30 - SD of the sampling distr.= SD of the populatio/sqrt(n)

Construct a standard normal probability distribution and plot its density function

n <- 35
mean_of_Sample <- 36.2
mean_of_SampDist <- 30
sd_of_SampDist <- 12.9/sqrt(n)

# create a range of possible sample mean values
x <- seq(19,41, length = 1000)  
# determine the corresponding probability densities 
y <- dnorm(x, mean=mean_of_SampDist, sd=sd_of_SampDist)

# dertermine the lower and upper bound (quantiles) that 
# delimit the range of possible sample mean values that represent 95% of the possible sample mean values
l <- mean_of_SampDist - 1.96 * sd_of_SampDist
u <- mean_of_SampDist + 1.96 * sd_of_SampDist

# plot the density curve without axis, titles, legends, ...
plot(x, y, 
     axes = FALSE, 
     type = 'l', 
     xlab = '', 
     ylab = '',
     ylim = c(0, 0.22),
     main = ''
     );

# add a horizontal axis at probability density = 0
abline(h = 0)

# add a diagram title 
text(mean_of_SampDist, 
     0.21, 
     # expression( paste("Sampling Distribution of the Sample Mean ", 
     #                   bar(x) , 
     #                   ", with n = 35  -->  " , 
     #                   bar(x) ,
     #                   " ~ N(" ,
     #                   mu[bar(x)] == mu ,
     #                  ", " ,
     #                  sigma[bar(x)] == sigma/sqrt(n),
     #                  ")"
     #                  )
     #            ) 
     
     expression( atop("Sampling Distribution of the Sample Mean "~bar(x)~"," ,
                 "with n = 35  -->  "~bar(x)~" ~ N("~mu[bar(x)]==mu~", "~sigma[bar(x)]==sigma/sqrt(n)~")" )
                ) 
      )

# add a title for the axis
text(40, 0,
     labels = expression( bar(x) ), 
     cex=1.0,
     pos=3
)

# add a tic mark and label for the center of the distribution
axis(1, 
     at = c(mean_of_SampDist), 
     font = 8,
     labels = expression( paste("", mu[bar(x)] == mu , "\n" , "= 30") ), 
     cex=0.3
)

# fill the central 95% under ther curve with blue color
x3 <- x[x > l & x < u]
y3 <- dnorm(x3, mean=mean_of_SampDist, sd=sd_of_SampDist)
xcoord <- c(l, x3, x3[length(x3)], l) 
ycoord <- c(0, y3, 0, 0)
polygon(xcoord, ycoord, col = 'cornflowerblue')

# label the blue part
text(mean_of_SampDist,0.05, 
     labels = "95%",
     cex=1.8,
     col="white"
)


text(mean_of_SampDist,0.02, 
     labels = expression(paste("P(", mu[bar(x)] - 1.96 * sigma[bar(x)] <= bar(x), " and " , bar(x) <= mu[bar(x)] + 1.96 * sigma[bar(x)] , ") = " , 0.95 , "" )),
     cex=0.8,
     col="white"
)


a1 <- pnorm(l, mean=mean_of_SampDist, sd=sd_of_SampDist)
a2 <- pnorm(u, mean=mean_of_SampDist, sd=sd_of_SampDist)

# add a tic mark and label for the lower and upper bound of the central 95% of the distribution
axis(1, 
     at = c(l), 
     font = 8,
     labels = expression( paste("", mu[bar(x)] - 1.96 * sigma[bar(x)] ,"") ), 
     cex=0.3
     )
axis(1, 
     at = c(u), 
     font = 8,
     labels = expression( paste("", mu[bar(x)] + 1.96 * sigma[bar(x)] ,"") ), 
     cex=0.3
     )

 
# add text that reports the probability for a mean to occur in the tails of the distribution  
text(l,0.05, 
     labels = expression(paste("P(",bar(x) <= mu[bar(x)] - 1.96 * sigma[bar(x)] , ") = ", 0.025 , "" ) ),
     pos=2,
     cex=0.8)

text(u,0.05, 
     labels = expression(paste("P(",bar(x) >= mu[bar(x)] + 1.96 * sigma[bar(x)]  , ") = ", 0.025 , ""  )),
     pos=4, 
     cex=0.8)

# fill the tails with grey color
x1 <- x[x <= l]
y1 <- dnorm(x1, mean=mean_of_SampDist, sd=sd_of_SampDist)
xcoord <- c(l, x1, x1[length(x1)], l) 
ycoord <- c(0, y1, 0, 0)
polygon(xcoord, ycoord, col = 'grey90')

x2 <- x[x >= u]
y2 <- dnorm(x2, mean=mean_of_SampDist, sd=sd_of_SampDist)
xcoord <- c(u, x2, x2[length(x2)], u) 
ycoord <- c(0, y2, 0, 0)
polygon(xcoord, ycoord, col = 'grey90')


# label the tails
text(l,0.005, 
     labels = "2,5%",
     cex=1.0,
     pos=2
)

text(u,0.005, 
     labels = "2,5%",
     cex=1.0,
     pos=4
)

# plot a line that represents the mean of the current sample
abline(v=36.2)
axis(1, 
     at = c(36.2), 
     font = 8,
     cex=0.3,
     col="red",
     lty=1,
     tcl= 2,
     labels=""
    )
# add a lablel to the line that represents the mean of the current sample
text(36.2, 0.02,
     labels = expression( bar(x)==36.2 ), 
     cex=1.0,
     pos=4
)