<- seq(-4, 4, length=100)
x <- dnorm(x)
hx
<- c(1, 3, 8, 30)
degf <- c("red", "blue", "darkgreen", "gold", "black")
colors <- c("df=1", "df=3", "df=8", "df=30", "normal")
labels
plot(x, hx, type="l", lty=2, xlab="x value",
ylab="Density", main="Comparison of t Distributions")
for (i in 1:4){
lines(x, dt(x,degf[i]), lwd=2, col=colors[i])
}
legend("topright", inset=.05, title="Distributions",
lwd=2, lty=c(1, 1, 1, 1, 2), col=colors) labels,
Probability Distributions in R
Some of the scripts are copied from this website
Example 1
Display the Student’s t distributions with various degrees of freedom and compare them to the normal distribution
Example 2
Children’s IQ scores are normally distributed with a mean of 100 and a standard deviation of 15. What proportion of children are expected to have an IQ between 80 and 120?
=100; sd=15
mean=80; ub=120
lb
<- seq(-4,4,length=100)*sd + mean
x <- dnorm(x,mean,sd)
hx
plot(x, hx, type="n", xlab="IQ Values", ylab="",
main="Normal Distribution", axes=FALSE)
<- x >= lb & x <= ub
i lines(x, hx)
polygon(c(lb,x[i],ub), c(0,hx[i],0), col="red")
<- pnorm(ub, mean, sd) - pnorm(lb, mean, sd)
area <- paste("P(",lb,"< IQ <",ub,") =",
result signif(area, digits=3))
mtext(result,3)
axis(1, at=seq(40, 160, 20), pos=0)
Example 3
Cumulative Sum
<-c(20:40) # egg cluster sizes to be examined
m<-c(rep(0,6),1/30*((26:30)-25),(36-((31:35)))/30,rep(0,5)) # PMF
pm<-cumsum(pm) # CDF
cm
barplot(pm, xlab="Cluster Size",, ylab="Probability",names.arg=m, main="CDF")
barplot(cm, xlab="Cluster Size",, ylab="Probability",names.arg=m, main="CDF")
<-cumsum(m)
cmEasy m
[1] 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
cmEasy
[1] 20 41 63 86 110 135 161 188 216 245 275 306 338 371 405 440 476 513 551
[20] 590 630
barplot(cmEasy, xlab="Cluster Size")
Example 4
According to a study 86% of the countries households own a cellular phone (one or more). In a random sample of 20 households, what is the probability that exactly 15 own a cell phone?
dbinom(15,20,0.86)
[1] 0.08680819
Example 5
In a random sample of 20 households, what is the probability that the number of households owning a cell phone is between 15 and 17?
dbinom(15,20,0.86)+dbinom(16,20,0.86)+dbinom(17,20,0.86)
[1] 0.4943078
pbinom(17, size=20, prob=0.86)-pbinom(14, size=20, prob=0.86)
[1] 0.4943078
Example 6
1 - Construct a binomial probability histogram with \(n=10\) and \(p=0,2\). 2 - Construct a binomial probability histogram with \(n=10\) and \(p=0,5\). 3 - Construct a binomial probability histogram with \(n=10\) and \(p=0,8\). Comment on the shape of the distribution.
First of all, we define a function for plotting a Binomial probability.
<- function(n, p, ...)
hist.binom
{# plots a Binomial probability distribution
<- 0:n
k <- dbinom(k, n, p)
p names(p) <- as.character(0:n)
barplot(p,space=0, ...)
}
To use the hist.binom()
function, you must specify the values of n and p.
<- function(n, p, ...)
hist.binom
{# plots a Binomial probability distribution
<- 0:n
k <- dbinom(k, n, p)
p names(p) <- as.character(0:n)
barplot(p,space=0, ...)
}
For example, to get the histogram of a binomial \((6,1/3)\) distribution, use hist.binom(6,1/3)
.
Try experimenting with the col
(color) argument; use help(colors)
or colors()
to see what colors R knows by name.
<- function(n, p, ...)
hist.binom
{# plots a Binomial probability distribution
<- 0:n
k <- dbinom(k, n, p)
p names(p) <- as.character(0:n)
barplot(p,space=0, ...)
}
An alternative way to plot a Binomial probability distribution
<-300
n<- 0:n
k <- dbinom(k, n, 0.2)
probs names(probs) <- as.character(0:n)
probs
0 1 2 3 4
8.452712e-30 6.339534e-28 2.369401e-26 5.884012e-25 1.092220e-23
5 6 7 8 9
1.616485e-22 1.986930e-21 2.086276e-20 1.910247e-19 1.549422e-18
10 11 12 13 14
1.127205e-17 7.429304e-17 4.473060e-16 2.477387e-15 1.269661e-14
15 16 17 18 19
6.052051e-14 2.695054e-13 1.125581e-12 4.424160e-12 1.641596e-11
20 21 22 23 24
5.766106e-11 1.922035e-10 6.093726e-10 1.841365e-09 5.313105e-09
25 26 27 28 29
1.466417e-08 3.877545e-08 9.837476e-08 2.397885e-07 5.622626e-07
30 31 32 33 34
1.269776e-06 2.764836e-06 5.810475e-06 1.179703e-05 2.316034e-05
35 36 37 38 39
4.400464e-05 8.098076e-05 1.444522e-04 2.499403e-04 4.197715e-04
40 41 42 43 44
6.847522e-04 1.085583e-03 1.673607e-03 2.510410e-03 3.665769e-03
45 46 47 48 49
5.213539e-03 7.225284e-03 9.761821e-03 1.286323e-02 1.653844e-02
50 51 52 53 54
2.075574e-02 2.543596e-02 3.044978e-02 3.562050e-02 4.073270e-02
55 56 57 58 59
4.554656e-02 4.981655e-02 5.331245e-02 5.584020e-02 5.725986e-02
60 61 62 63 64
5.749845e-02 5.655585e-02 5.450342e-02 5.147545e-02 4.765501e-02
65 66 67 68 69
4.325608e-02 3.850447e-02 3.361957e-02 2.879912e-02 2.420796e-02
70 71 72 73 74
1.997156e-02 1.617415e-02 1.286070e-02 1.004192e-02 7.701063e-03
75 76 77 78 79
5.801468e-03 4.293849e-03 3.122800e-03 2.232001e-03 1.568051e-03
80 81 82 83 84
1.082935e-03 7.353265e-04 4.909650e-04 3.223806e-04 2.082041e-04
85 86 87 88 89
1.322709e-04 8.266929e-05 5.083686e-05 3.076208e-05 1.831899e-05
90 91 92 93 94
1.073696e-05 6.194403e-06 3.518017e-06 1.967063e-06 1.082931e-06
95 96 97 98 99
5.870626e-07 3.134058e-07 1.647804e-07 8.533270e-08 4.352829e-08
100 101 102 103 104
2.187297e-08 1.082820e-08 5.281402e-09 2.538150e-09 1.201960e-09
105 106 107 108 109
5.609148e-10 2.579679e-10 1.169294e-10 5.223928e-11 2.300445e-11
110 111 112 113 114
9.986024e-12 4.273299e-12 1.802798e-12 7.498363e-13 3.074987e-13
115 116 117 118 119
1.243364e-13 4.957378e-14 1.949055e-14 7.556717e-15 2.889333e-15
120 121 122 123 124
1.089519e-15 4.051931e-16 1.486262e-16 5.377125e-17 1.918853e-17
125 126 127 128 129
6.754363e-18 2.345265e-18 8.032994e-19 2.714273e-19 9.047578e-20
130 131 132 133 134
2.975261e-20 9.652565e-21 3.089552e-21 9.756481e-22 3.039799e-22
135 136 137 138 139
9.344567e-23 2.834290e-23 8.482181e-24 2.504702e-24 7.297873e-25
140 141 142 143 144
2.098138e-25 5.952166e-26 1.666187e-26 4.602405e-27 1.254475e-27
145 146 147 148 149
3.374105e-28 8.955245e-29 2.345421e-29 6.061646e-30 1.545923e-30
150 151 152 153 154
3.890573e-31 9.662020e-32 2.367831e-32 5.726126e-33 1.366462e-33
155 156 157 158 159
3.217798e-34 7.477254e-35 1.714530e-35 3.879394e-36 8.661541e-37
160 161 162 163 164
1.908246e-37 4.148360e-38 8.898489e-39 1.883422e-39 3.933367e-40
165 166 167 168 169
8.105120e-41 1.647878e-41 3.305624e-42 6.542381e-43 1.277506e-43
170 171 172 173 174
2.461078e-44 4.677488e-45 8.770291e-46 1.622250e-46 2.960141e-47
175 176 177 178 179
5.328253e-48 9.460677e-49 1.656955e-49 2.862436e-50 4.877335e-51
180 181 182 183 184
8.196632e-52 1.358558e-52 2.220719e-53 3.579848e-54 5.690791e-55
185 186 187 188 189
8.920700e-56 1.378872e-56 2.101489e-57 3.157822e-58 4.678255e-59
190 191 192 193 194
6.832715e-60 9.837679e-61 1.396233e-61 1.953279e-62 2.693311e-63
195 196 197 198 199
3.660140e-64 4.901973e-65 6.469609e-66 8.413759e-67 1.078145e-67
200 201 202 203 204
1.361158e-68 1.692983e-69 2.074323e-70 2.503493e-71 2.975966e-72
205 206 207 208 209
3.484058e-73 4.016814e-74 4.560152e-75 5.097285e-76 5.609452e-77
210 211 212 213 214
6.076907e-78 6.480114e-79 6.801063e-80 7.024572e-81 7.139460e-82
215 216 217 218 219
7.139460e-83 7.023774e-84 6.797201e-85 6.469812e-86 6.056217e-87
220 221 222 223 224
5.574473e-88 5.044772e-89 4.488029e-90 3.924509e-91 3.372625e-92
225 226 227 228 229
2.847995e-93 2.362827e-94 1.925652e-95 1.541366e-96 1.211554e-97
230 231 232 233 234
9.350040e-99 7.083363e-100 5.266725e-101 3.842675e-102 2.750633e-103
235 236 237 238 239
1.931295e-104 1.329811e-105 8.977629e-107 5.941078e-108 3.853001e-109
240 241 242 243 244
2.448261e-110 1.523814e-111 9.287708e-113 5.542048e-114 3.236647e-115
245 246 247 248 249
1.849512e-116 1.033772e-117 5.650172e-119 3.018741e-120 1.576050e-121
250 251 252 253 254
8.037853e-123 4.002915e-124 1.945861e-125 9.229382e-127 4.269497e-128
255 256 257 258 259
1.925460e-129 8.461492e-131 3.621650e-132 1.509021e-133 6.117653e-135
260 261 262 263 264
2.411767e-136 9.240487e-138 3.438731e-139 1.242127e-140 4.352150e-142
265 266 267 268 269
1.478089e-143 4.862133e-145 1.547870e-146 4.764899e-148 1.417070e-149
270 271 272 273 274
4.067517e-151 1.125696e-152 3.000478e-154 7.693533e-156 1.895305e-157
275 276 277 278 279
4.479811e-159 1.014450e-160 2.197364e-162 4.544909e-164 8.959497e-166
280 281 282 283 284
1.679906e-167 2.989156e-169 5.034926e-171 8.006066e-173 1.198091e-174
285 286 287 288 289
1.681531e-176 2.204805e-178 2.688786e-180 3.034221e-182 3.149710e-184
290 291 292 293 294
2.986794e-186 2.565974e-188 1.977206e-190 1.349629e-192 8.033504e-195
295 296 297 298 299
4.084833e-197 1.725014e-199 5.808127e-202 1.461777e-204 2.444443e-207
300
2.037036e-210
plot(k,probs, type="h", ylim=c(0,max(probs)))
plot(k,probs, type="s", ylim=c(0,max(probs)))
plot(k,probs, type="h", ylim=c(0,max(probs)), xlim=c(40,80))
Example 7
For a fixed \(p\), as the number of trials in a binominal experiment increases, the probability distribution of the random variable X becomes bell shaped.
<- function(n, p, ...)
hist.binom
{# plots a Binomial probability distribution
<- 0:n
k <- dbinom(k, n, p)
p names(p) <- as.character(0:n)
barplot(p,space=0, ...)
}
Example 8
According to a study 86% of the countries households own a cellular phone (one or more). In a simple random sample of 300 households, 275 owned at least one cell phone. Is this result unusual?
<- 300
n <- 0.86
p
dbinom(275,300,0.86)
[1] 0.0008531712
The answer cannot be found by simply calculating the probability for a particular value to occur! Examine the probabilities of all possible outcomes:
<- c(0,seq(1:300))
sampleSpace <- dbinom(sampleSpace,300,0.86)
sampleSpaceProbs sampleSpaceProbs
[1] 6.893038e-257 1.270289e-253 1.166579e-250 7.118351e-248 3.246731e-245
[6] 1.180697e-242 3.565986e-240 9.200243e-238 2.069890e-235 4.125324e-233
[11] 7.374312e-231 1.194255e-228 1.766787e-226 2.404384e-224 3.027806e-222
[16] 3.546282e-220 3.880329e-218 3.982065e-216 3.845853e-214 3.506377e-212
[21] 3.026254e-210 2.478646e-208 1.930930e-206 1.433685e-204 1.016466e-202
[26] 6.893381e-201 4.478804e-199 2.792025e-197 1.672223e-195 9.634643e-194
[31] 5.346309e-192 2.860399e-190 1.477064e-188 7.368694e-187 3.554621e-185
[36] 1.659500e-183 7.503970e-182 3.288999e-180 1.398319e-178 5.770499e-177
[41] 2.312940e-175 9.009988e-174 3.413069e-172 1.257960e-170 4.513544e-169
[46] 1.577304e-167 5.371162e-166 1.783095e-164 5.773302e-163 1.823892e-161
[51] 5.624362e-160 1.693610e-158 4.981727e-157 1.431944e-155 4.023461e-154
[56] 1.105459e-152 2.970921e-151 7.812256e-150 2.010598e-148 5.065927e-147
[61] 1.249957e-145 3.020974e-144 7.153582e-143 1.660085e-141 3.776323e-140
[66] 8.422446e-139 1.842182e-137 3.952247e-136 8.318815e-135 1.718188e-133
[71] 3.483013e-132 6.930986e-131 1.354155e-129 2.598070e-128 4.895707e-127
[76] 9.062187e-126 1.648057e-124 2.945100e-123 5.172265e-122 8.928470e-121
[81] 1.515129e-119 2.527888e-118 4.147233e-117 6.691251e-116 1.061838e-114
[86] 1.657538e-113 2.545504e-112 3.846261e-111 5.718816e-110 8.368014e-109
[91] 1.205127e-107 1.708367e-106 2.384020e-105 3.275373e-104 4.430713e-103
[96] 5.901843e-102 7.741778e-101 1.000160e-99 1.272653e-98 1.595132e-97
[101] 1.969532e-96 2.395753e-95 2.871213e-94 3.390500e-93 3.945182e-92
[106] 4.523809e-91 5.112148e-90 5.693663e-89 6.250226e-88 6.763023e-87
[111] 7.213599e-86 7.584955e-85 7.862619e-84 8.035576e-83 8.097001e-82
[116] 8.044698e-81 7.881228e-80 7.613709e-79 7.253302e-78 6.814447e-77
[121] 6.313909e-76 5.769735e-75 5.200194e-74 4.622797e-73 4.053469e-72
[126] 3.505903e-71 2.991147e-70 2.517409e-69 2.090067e-68 1.711865e-67
[131] 1.383224e-66 1.102658e-65 8.672091e-65 6.729021e-64 5.151503e-63
[136] 3.891157e-62 2.899975e-61 2.132494e-60 1.547273e-59 1.107739e-58
[141] 7.825385e-58 5.454777e-57 3.751943e-56 2.546523e-55 1.705514e-54
[146] 1.127151e-53 7.350747e-53 4.730481e-52 3.004038e-51 1.882492e-50
[151] 1.164097e-49 7.103526e-49 4.277471e-48 2.541720e-47 1.490372e-46
[156] 8.623556e-46 4.923798e-45 2.774172e-44 1.542350e-43 8.461444e-43
[161] 4.580512e-42 2.446733e-41 1.289605e-40 6.706851e-40 3.441643e-39
[166] 1.742574e-38 8.705371e-38 4.290877e-37 2.086694e-36 1.001190e-35
[171] 4.739246e-35 2.213232e-34 1.019668e-33 4.634393e-33 2.077867e-32
[176] 9.190109e-32 4.009483e-31 1.725469e-30 7.324244e-30 3.066479e-29
[181] 1.266261e-28 5.156991e-28 2.071297e-27 8.204341e-27 3.204661e-26
[186] 1.234351e-25 4.688070e-25 1.755609e-24 6.482146e-24 2.359638e-23
[191] 8.468085e-23 2.995815e-22 1.044746e-21 3.591265e-21 1.216746e-20
[196] 4.062952e-20 1.337043e-19 4.335935e-19 1.385560e-18 4.362574e-18
[201] 1.353333e-17 4.135985e-17 1.245183e-16 3.692611e-16 1.078563e-15
[206] 3.102654e-15 8.789419e-15 2.451817e-14 6.734078e-14 1.820917e-13
[211] 4.847107e-13 1.270027e-12 3.275199e-12 8.312107e-12 2.075807e-11
[216] 5.100555e-11 1.232971e-10 2.931858e-10 6.857013e-10 1.577158e-09
[221] 3.567039e-09 7.931878e-09 1.733886e-08 3.725467e-08 7.866723e-08
[226] 1.632283e-07 3.327504e-07 6.663384e-07 1.310549e-06 2.531166e-06
[231] 4.799783e-06 8.934662e-06 1.632336e-05 2.926396e-05 5.147098e-05
[236] 8.879917e-05 1.502383e-04 2.492199e-04 4.052441e-04 6.457745e-04
[241] 1.008254e-03 1.541965e-03 2.309307e-03 3.385897e-03 4.858802e-03
[246] 6.822155e-03 9.369568e-03 1.258307e-02 1.651891e-02 2.119121e-02
[251] 2.655562e-02 3.249549e-02 3.881406e-02 4.523558e-02 5.141794e-02
[256] 5.697742e-02 6.152417e-02 6.470468e-02 6.624527e-02 6.598950e-02
[261] 6.392279e-02 6.017909e-02 5.502745e-02 4.884022e-02 4.204805e-02
[266] 3.508915e-02 2.836153e-02 2.218543e-02 1.678098e-02 1.226268e-02
[271] 8.648759e-03 5.881339e-03 3.851906e-03 2.426842e-03 1.469011e-03
[276] 8.531712e-04 4.747200e-04 2.526617e-04 1.284082e-04 6.219877e-05
[281] 2.865586e-05 1.252874e-05 5.185402e-06 2.025998e-06 7.449720e-07
[286] 2.569126e-07 8.277155e-08 2.480263e-08 6.877315e-09 1.754174e-09
[291] 4.087313e-10 8.628102e-11 1.633599e-11 2.739922e-12 4.007368e-13
[296] 5.006785e-14 5.195264e-15 4.298150e-16 2.658012e-17 1.092160e-18
[301] 2.236327e-20
How many outcomes have a corresponding probability greater than \(0.05\)?
<- 1
i while(i < 301)
if (sampleSpaceProbs[i] >= 0.05)
{
{print(
paste(as.character(sampleSpace[i]),
" ",
as.character(sampleSpaceProbs[i])
)
);
}<- i+1;
i }
[1] "254 0.051417944661263"
[1] "255 0.0569774199103519"
[1] "256 0.0615241671465016"
[1] "257 0.0647046827355092"
[1] "258 0.066245270419688"
[1] "259 0.0659894971748243"
[1] "260 0.063922793142426"
[1] "261 0.0601790937082499"
[1] "262 0.0550274482817531"
==> only 9 values have probabilities slightly higher than 5%
With an increasing number of possible outcomes, the probability of any particular outcome to occur will decrease!
Try this:
<- c(0,seq(1:3000))
sampleSpace <- dbinom(sampleSpace,3000,0.86)
sampleSpaceProbs
<- 1
i while(i < 3001)
if (sampleSpaceProbs[i] >= 0.05)
{
{print(
paste(as.character(sampleSpace[i]),
" ",
as.character(sampleSpaceProbs[i])
)
);
}<- i+1;
i }
==> none of the values has a probability of at least 5% to occur
The approach: check for the range of values around the expected value that add up to 95% of all possible occurrences. Values outside that range are considered unusual.
The Empirical Rule states that in a bell-shaped distribution about 95% of all observations lie between ?? - 2?? and ?? + 2??. Any observation that lies outside this intervall occurs less than 5% of the time and must be considered unusual.
Can we assume that the given binimial distribution is bell-shaped \(n*p*(1-p)>9\)? ==> TRUE ==> It’s bell-shaped distribution. Thus we can apply the Empirical Rule!
–> Expected Value or Mean � of the given binomial dist.
<- n*p expV_Cellphones
–> Standard Deviation ?? of the given binomial dist.
<- sqrt(n*p*(1-p)) sd_Cellphones
Is 275 smaller than ?? - 2??
- 2*sd_Cellphones expV_Cellphones
[1] 245.98
275 < expV_Cellphones - 2*sd_Cellphones
Is 275 greater than ?? - 2??
+ 2*sd_Cellphones expV_Cellphones
[1] 270.02
275 > expV_Cellphones + 2*sd_Cellphones
==> unusual result
Visualization of the result
The hist()
function does not work here, since we do not have counts but probabilities/relative frequencies we can create a barplot instead, that looks like a histogram
barplot(dbinom(229:290,n,p), space = 0, width = 1, names.arg = as.character(229:290))
We create our own hist function for binomial probability distributions
<- function(n, p, ...)
hist.binom
{# plots a Binomial probability distribution
<- 0:n
k <- dbinom(k, n, p)
p names(p) <- as.character(0:n)
barplot(p,space=0, ...)
}
Example 8b
According to the American Red Cross, 7% of people in the United States have blood type 0-negative. What is the probability that in a single random sample of 500 people in the US fewer than 30 have blood type 0-negative?
<- 500
n <- 0.07
p
pbinom(29,n,p)
[1] 0.1677677
The answer can also be approximated using the normal dist. if the given binomial dist. approx. the shape of a normal dist.
Can we assume that the given binimial distribution is normal \(n*p*(1-p)>9\)? ==> TRUE ==> approx. normal distribution
Determine the necessary parameters for the normal dist. –> Expected Value or Mean � of the given binomial dist.
<- n*p expV_bloodType
–> Standard Deviation ?? of the given binomial dist.
<- sqrt(n*p*(1-p)) sd_bloodType
Probability that fewer than 30 have that blood type in a sample of 500
pnorm(29.5, expV_bloodType, sd_bloodType)
[1] 0.1675173
Visualisation of the result
The hist()
function does not work here, since we do not have counts but probabilities/relative frequencies we can create a barplot instead, that looks like a histogram
<- barplot(dbinom(15:55,n,p), space = 0, width = 1,
bp #names.arg = as.character(15:55),
col="yellow", main="Binom. Dist. (n=500, p=0.07) with a superimposed normal curve",
axes=FALSE, xlab="No of people with blood typ 0-negative", ylab="Prob. Density")
axis(1, labels = c(15:55), at = bp, tcl = -0.25)
axis(2)
bp
[,1]
[1,] 0.5
[2,] 1.5
[3,] 2.5
[4,] 3.5
[5,] 4.5
[6,] 5.5
[7,] 6.5
[8,] 7.5
[9,] 8.5
[10,] 9.5
[11,] 10.5
[12,] 11.5
[13,] 12.5
[14,] 13.5
[15,] 14.5
[16,] 15.5
[17,] 16.5
[18,] 17.5
[19,] 18.5
[20,] 19.5
[21,] 20.5
[22,] 21.5
[23,] 22.5
[24,] 23.5
[25,] 24.5
[26,] 25.5
[27,] 26.5
[28,] 27.5
[29,] 28.5
[30,] 29.5
[31,] 30.5
[32,] 31.5
[33,] 32.5
[34,] 33.5
[35,] 34.5
[36,] 35.5
[37,] 36.5
[38,] 37.5
[39,] 38.5
[40,] 39.5
[41,] 40.5
dbinom(15:55,n,p)
[1] 0.0000464146 0.0001058989 0.0002269358 0.0004583453 0.0008751878
[6] 0.0015842782 0.0027256399 0.0044667989 0.0069873349 0.0104528276
[11] 0.0149801382 0.0205992389 0.0272195439 0.0346097964 0.0423992463
[16] 0.0501040555 0.0571773648 0.0630757018 0.0673300747 0.0696084801
[21] 0.0697581758 0.0678204487 0.0640165118 0.0587089742 0.0523476213
[26] 0.0454101543 0.0383479404 0.0315442736 0.0252890575 0.0197702090
[31] 0.0150792131 0.0112265764 0.0081624479 0.0057981905 0.0040257790
[36] 0.0027332009 0.0018152188 0.0011797421 0.0007505927 0.0004676632
[41] 0.0002854437
# color the interesting area differently
barplot( dbinom(15:29, n, p),
space=0,
col="skyblue",
add=TRUE)
# add a vertical line for x = 29
abline(v = 14.5, col = "red", lty = 1)
text(x=12.5, y=0.06,
pos=4, paste("29"),cex=0.75, col = "red")
<- xy.coords(x=0.5:39.5, y= dnorm(15:54,expV_bloodType,sd_bloodType))
xy lines(xy,
lwd=2, col=2)
Example 9
Create a plot for a uniform distribution
<- seq(-1, 32, length=4000)
xUnif <- dunif(xUnif,min=0, max=30)
hxUnif plot(xUnif, hxUnif, type="l", lty=1, xlab="x value",xlim = range(-2,32),
ylab="Density",ylim = range(0,0.04), main="Uniform Distribution",col="blue")
Example 10
Create a uniform population and simulate 1000 samples of size 3, 12 and 30 and calculate the sampling means.
# Create a uniform population with 10000 individuals
<- runif(10000,min=0, max=30)
dataUnif <- hist(dataUnif, breaks=16, xlab="x",
n_Hist main="Uniform Distribution of x", freq=F, col= "grey",
xlim = range(-2,32))
<- 1000 # number of samples
N <- 3 # sample size n1
n1 <- 12 # sample size n2
n2 <- 30 # sample size n3
n3
<- NULL
vec_n1 <- NULL
vec_n2 <- NULL
vec_n3
while (N > 0) {
<-N-1;
N<-sample(dataUnif, n1);
samp_n1<-sample(dataUnif, n2);
samp_n2<-sample(dataUnif, n3);
samp_n3
<-c(vec_n1,mean(samp_n1));
vec_n1<-c(vec_n2,mean(samp_n2));
vec_n2<-c(vec_n3,mean(samp_n3));
vec_n3 }
Create frequency histograms for the sampling means.
Version 1: A diagram with a plot of x in the population is added
par(mfrow=c(2,2), font.main= 1, cex.main=1) # Splits graphics output into a table of 2 rows and 2 columns
plot(xUnif, hxUnif, type="l", lty=1, xlab="x value",xlim = range(-2,32),
ylab="Density",ylim = range(0,0.04), main="Uniform Distribution",col="blue")
<- hist(vec_n1, breaks=16, xlab="Sample Means",
n1_Hist main="Distribution of the sample means with n = 3", freq=F, col= "grey",
xlim = range(-2,32))
<- hist(vec_n2, breaks=16, xlab="Sample Means",
n2_Hist main="Distribution of the sample means with n = 12", freq=F, col= "grey",
xlim = range(-2,32))
<- hist(vec_n3, breaks=16, xlab="Sample Means",
n3_Hist main="Distribution of the sample means with n = 30", freq=F, col= "grey",
xlim = range(-2,32))
par(mfrow=c(1,1)) # This line resets the graphics device to a 1x1 table
Version 2: A histogram of the population data of x is added
par(mfrow=c(2,2), font.main= 1, cex.main=1) # Splits graphics output into a table of 2 rows and 2 columns
<- hist(dataUnif, breaks=16, xlab="x",
n_Hist main="Uniform Distribution of X", freq=F, col= "grey",
xlim = range(-2,32))
<- hist(vec_n1, breaks=16, xlab="Sample Means",
n1_Hist main="Distribution of the sample means with n = 3", freq=F, col= "grey",
xlim = range(-2,32))
<- hist(vec_n2, breaks=16, xlab="Sample Means",
n2_Hist main="Distribution of the sample means with n = 12", freq=F, col= "grey",
xlim = range(-2,32))
<- hist(vec_n3, breaks=16, xlab="Sample Means",
n3_Hist main="Distribution of the sample means with n = 30", freq=F, col= "grey",
xlim = range(-2,32))
par(mfrow=c(1,1)) # This line resets the graphics device to a 1x1 table
Example 11
Create histograms of 1000 sampling means from samples with \(n = 9\) and \(n = 25\) that are taken from a population that is not normal. Make sure that each is appropriately labelled. Determine the mean and the sd of the frequency dist.
#Generate some non-normally distributed data (f distribution, Fisher-Dist.).
<- rf(10000,10,10)
population hist(population)
# Simulate 1000 samples of size 4, 10 and 35 and calculate the sampling means of scores that are not normally distr.
<- 1000
N <- 4
n1 <- 10
n2 <- 35
n3 <- NULL
vec_n1 <- NULL
vec_n2 <- NULL
vec_n3
while (N>0)
{<-N-1;
N<-rf(n1,3,27);
samp_n1<-rf(n2,3,27);
samp_n2<-rf(n3,3,27);
samp_n3
<-c(vec_n1,mean(samp_n1));
vec_n1<-c(vec_n2,mean(samp_n2));
vec_n2<-c(vec_n3,mean(samp_n3));
vec_n3
}
par(mfrow=c(2,2), font.main= 1, cex.main=1) # Splits graphics output into a table of 2 rows and 2 columns
<- hist(population, breaks=16,
pop_Hist xlab="Scores from a Fisher Distribution (Population)",
xlim = range(0 ,6),
main="Fisher Distribution",
freq = F )
<- hist(vec_n1, breaks=16, ylab="", xlab="Sample Means",
n1_Hist xlim = range(0,5),main="Distribution of the sample means with n = 4", freq=F)
<- hist(vec_n2, breaks=16, ylab="", xlab="Sample Means",
n2_Hist xlim = range(0,4),main="Distribution of the sample means with n = 10", freq=F)
<- hist(vec_n3, breaks=16, ylab="", xlab="Sample Means",
n3_Hist xlim = range(0, 3),ylim=range(0,2.7),
main="Distribution of the sample means with n = 35", freq=F)
par(mfrow=c(1,1)) # This line resets the graphics device to a 1x1 table
Example 12
Simulate 1000 samples of size 9 and 25 and calculate the sampling means of normally distributed IQ Scores (\(�=100\), \(Sigma=15\))
<- 1000
N <- 9
n1 <- 25
n2 <- NULL
vec_n1 <- NULL
vec_n2
while (N>0)
{<-N-1;
N<-rnorm(n1, mean=100, sd=15);
samp_n1<-rnorm(n2, mean=100, sd=15);
samp_n2
<-c(vec_n1,mean(samp_n1));
vec_n1<-c(vec_n2,mean(samp_n2));
vec_n2
}
par(mfrow=c(1,2)) # Splits graphics output into a table of 3 rows and 2 columns
hist(vec_n1, breaks=16, xlab="Sample Means, n=9",
xlim = range(80,120),main="")
hist(vec_n1, breaks=16, xlab="Sample Means, n=25",
xlim = range(80,120),main="")
par(mfrow=c(1,1)) # This line resets the graphics device to a 1x1 table
Example 13
Create histograms for a normally distributed variable (IQ Scores) and the means of 1000 simulated sample with \(n = 9\). Make sure that each is appropriately labelled. Determine the mean and the sd of the frequency dist.
# simulate a population of size 10000
<-rnorm(10000, mean=100, sd=15);
population
par(mfrow=c(1,2)) # Splits graphics output into a table of 3 rows and 2 columns
<- hist(population, breaks=16, xlab="IQ Scores (Population)",
pop_Hist xlim = range(40,160),ylim=c(0,2800),main="")
<- round(mean(population), digits=2)
mean_pop <- round(sd(population), digits=2)
sd_pop text(c(70),c(2750),paste("mean=",mean_pop),cex=0.75)
text(c(68),c(2600),paste("sd=",sd_pop),cex=0.75)
<- hist(vec_n1, breaks=16, xlab="Sample Means, n=9",
n1_Hist xlim = range(40,160), ylim=c(0,180),main="")
<- round(mean(vec_n1), digits=2)
mean_n1 <- round(sd(vec_n1), digits=2)
sd_n1
# if we would want to superimpose a normalcurve (density vs. random variable)
# we first would have to change our histograms to freq=FALSE)
# otherwise we are going to get a line that looks like it is attached to the
# very bottom of the bars
#lines(n1_Hist$mids, dnorm(n1_Hist$mids,mean_n1,sd_n1), lwd=2, col=3)
text(c(72),c(175),paste("mean=",mean_n1),cex=0.75)
text(c(70),c(165),paste("sd=",sd_n1),cex=0.75)
par(mfrow=c(1,1)) # This line resets the graphics device to a 1x1 table
Example 14
Create histograms with superimposed density curves for a normaly distributed variable (IQ Scores) and the means of 1000 simulated sample with n= 9. Make sure that each is appropriately labelled. Determine the mean and the sd of the frequency dist.
# Version 2 with densities and superposed normal curves
<-rnorm(10000, mean=100, sd=15);
population
par(mfrow=c(1,2)) # Splits graphics output into a table of 3 rows and 2 columns
if we want to superimpose a normalcurve (density vs. random variable), we have to change our histograms to freq=FALSE otherwise we are going to get a line that looks like it is attached to the very bottom of the bars
<- hist(population, breaks=16, xlab="IQ Scores (Population)",xlim = range(40,160),ylim=c(0, 0.028), main="", freq=F)
pop_Hist lines(pop_Hist$mids, dnorm(pop_Hist$mids,100,15), lwd=2, col=2)
<- round(mean(population), digits=2)
mean_pop <- round(sd(population), digits=2)
sd_pop text(c(68),c(0.028),paste("mean=",mean_pop),cex=0.75)
text(c(66),c(0.026),paste("sd=",sd_pop),cex=0.75)
<- hist(vec_n1, breaks=16, xlab="Sample Means, n=9",xlim = range(40,160),ylim=c(0,0.09), main="", freq=F)
n1_Hist lines(pop_Hist$mids, dnorm(pop_Hist$mids,100,15), lwd=2, col=2)
<- round(mean(vec_n1), digits=2)
mean_n1 <- round(sd(vec_n1), digits=2)
sd_n1 lines(n1_Hist$mids, dnorm(n1_Hist$mids,mean_n1,sd_n1), lwd=2, col=3)
text(c(68),c(0.09),paste("mean=",mean_n1),cex=0.75)
text(c(66),c(0.083),paste("sd=",sd_n1),cex=0.75)
par(mfrow=c(1,1)) # This line resets the graphics device to a 1x1 table
Example 15
Create histograms of 1000 sampling means from samples with \(n = 9\) and \(n = 25\) and a normally distributed population. Make sure that each is appropriately labelled. Determine the mean and the sd of the frequency dist.
par(mfrow=c(1,2)) # Splits graphics output into a table of 3 rows and 2 columns
<- hist(vec_n1, breaks=16, freq=FALSE, xlab="n=9",xlim = range(70,120),main="")
n1_Hist <- round(mean(vec_n1), digits=2)
mean_n1 <- round(sd(vec_n1), digits=2)
sd_n1 text(c(80),c(0.08),paste("mean=",mean_n1),cex=0.75)
text(c(80),c(0.073),paste("sd=",sd_n1),cex=0.75)
lines(n1_Hist$mids, dnorm(n1_Hist$mids,mean_n1,sd_n1), lwd=2, col=2)
<- hist(vec_n2, breaks=16, freq=FALSE, ylab="", xlab="n=25",xlim = range(70,120),main="")
n2_Hist <- round(mean(vec_n2), digits=2)
mean_n2 <- round(sd(vec_n2), digits=2)
sd_n2 text(c(82),c(0.11),paste("mean=",mean_n2),cex=0.75)
text(c(80),c(0.1),paste("sd=",sd_n2),cex=0.75)
lines(n2_Hist$mids, dnorm(n2_Hist$mids,mean_n2,sd_n2), lwd=2, col=3)
lines(n1_Hist$mids, dnorm(n1_Hist$mids,mean_n1,sd_n1), lwd=2, col=2)
# Text is written in one of the four margins of the current figure region or ...
mtext("Hist. of the Sampling Dist. of the Mean",side=3, line=2, outer=F,col=2,cex=1.5,adj = 1.5)
par(mfrow=c(1,1)) # This line resets the graphics device to a 1x1 table
Example 16
Create histograms of 1000 sampling means from samples with n= 9 and n=25 and a population that is not normaly distributed. Make sure that each is appropriately labelled. Determine the mean and the sd of the frequency dist.
#Generate some non-normally distributed data (f distribution, Fisher-Dist.).
<- rf(10000,3,27)
population hist(population)
par(mfrow=c(1,4)) # Splits graphics output into a table of 3 rows and 2 columns
<- hist(population, breaks=16,
pop_Hist xlab="Scores from a Fisher Distribution (Population)",
xlim = range(0 ,9),
main="",
freq = F )
lines(pop_Hist$mids, df(pop_Hist$mids,3,27), lwd=2, col=2)
<- round(mean(population), digits=2)
mean_pop <- round(sd(population), digits=2)
sd_pop text(c(6),c(0.3),paste("mean=",mean_pop),cex=0.75)
text(c(6),c(0.26),paste("sd=",sd_pop),cex=0.75)
# Simulate 1000 samples of size 4, 10 and 35 and calculate the sampling means of scores that are not normally distr.
<- 1000
N <- 4
n1 <- 10
n2 <- 35
n3 <- NULL
vec_n1 <- NULL
vec_n2 <- NULL
vec_n3
while (N>0)
{<-N-1;
N<-rf(n1,3,27);
samp_n1<-rf(n2,3,27);
samp_n2<-rf(n3,3,27);
samp_n3
<-c(vec_n1,mean(samp_n1));
vec_n1<-c(vec_n2,mean(samp_n2));
vec_n2<-c(vec_n3,mean(samp_n3));
vec_n3
}
<- hist(vec_n1, breaks=16, ylab="", xlab="Sample Means, n=4",xlim = range(0,3),main="", freq=F)
n1_Hist <- round(mean(vec_n1), digits=2)
mean_n1 <- round(sd(vec_n1), digits=2)
sd_n1 text(c(2.3),c(0.4),paste("mean=",mean_n1),cex=0.75)
text(c(2.3),c(0.3),paste("sd=",sd_n1),cex=0.75)
<- hist(vec_n2, breaks=16, ylab="", xlab="Sample Means, n=10",xlim = range(0,3),main="", freq=F)
n2_Hist <- round(mean(vec_n2), digits=2)
mean_n2 <- round(sd(vec_n2), digits=2)
sd_n2 text(c(2.3),c(0.4),paste("mean=",mean_n2),cex=0.75)
text(c(2.3),c(0.3),paste("sd=",sd_n2),cex=0.75)
<- hist(vec_n3, breaks=16, ylab="", xlab="Sample Means, n=35",xlim = range(0.5,2),ylim=range(0,2.7) , main="", freq=F)
n3_Hist <- round(mean(vec_n3), digits=2)
mean_n3 <- round(sd(vec_n3), digits=2)
sd_n3 text(c(1.7),c(1.4),paste("mean=",mean_n3),cex=0.75)
text(c(1.7),c(1.3),paste("sd=",sd_n3),cex=0.75)
# create theoretical scores to draw a normal curve into the plot
<- seq(-4, 4, length=10000)
temp_x lines(temp_x, dnorm(temp_x,mean_n3,sd_n3), lwd=2, col=3)
# Text is written in one of the four margins of the current figure region or ...
mtext("Population Scores Following a Fisher Distribution \r with 3 sampling Distributions of the Mean (n=4, n=10 and n=35)",
side=3, line=2, outer=F,col=2,cex=0.75,adj = 1)
par(mfrow=c(1,1)) # This line resets the graphics device to a 1x1 table
Example 17
Display the Student’s t distributions with various degrees of freedom and compare to the normal distribution
<- seq(-4, 4, length=100)
x <- dnorm(x)
hx
<- c(1, 3, 8, 30)
degf <- c("red", "blue", "darkgreen", "gold", "black")
colors <- c("df=1", "df=3", "df=8", "df=30", "normal")
labels
plot(x, hx, type="l", lty=2, xlab="x value",
ylab="Density", main="Comparison of t Distributions")
for (i in 1:4){
lines(x, dt(x,degf[i]), lwd=2, col=colors[i])
}
legend("topright", inset=.05, title="Distributions",
lwd=2, lty=c(1, 1, 1, 1, 2), col=colors) labels,
Example 18
Assesing Normality in R
#Generate some normally distributed data.
<- rnorm(1000,17,3)
y hist(y)
qqnorm(y)
qqline(y, col="green")
#Perform the Shapiro-Wilks test for normality
shapiro.test(y)
Shapiro-Wilk normality test
data: y
W = 0.99807, p-value = 0.311
#Perform Anderson-Darling test for normality
library(nortest)
ad.test(y)
Anderson-Darling normality test
data: y
A = 0.49311, p-value = 0.2165
#Generate some non-normally distributed data (f distribution, Fisher-Dist.).
<- rf(1000,3,27)
y hist(y)
qqnorm(y)
qqline(y, col="green")
#Perform the Shapiro-Wilks test for normality
shapiro.test(y)
Shapiro-Wilk normality test
data: y
W = 0.82243, p-value < 2.2e-16
#Perform Anderson-Darling test for normality
library(nortest)
ad.test(y)
Anderson-Darling normality test
data: y
A = 36.413, p-value < 2.2e-16
## The following data represent the finishing times
# (in seconds) for six randomly selected races of a
# greyhound named Barbies Bomber.
# Is there evidence to support the belief that the
# variable "finishing time" is normally distributed?
<- c(31.35,32.06,31.91,32.52,31.26,32.37)
finishTimes qqnorm(finishTimes)
qqline(finishTimes, col="green")
Example 19
1 - Construct a Poisson probability histogram with lambda=1. 2 - Construct a Poisson probability histogram with lambda=10. 3 - Construct a Poisson probability histogram with lambda=30.
To use the hist.pois()
function, you must specify the values of lambda (l, the average number of events/observations/… per unit of time/space/…) and the maximum number of counts per unit you want to be considered. For example, to get the histogram of a poisson(lambda=1) distribution, use hist.pois(1,15)
<- function(l, maxCounts,...)
hist.pois
{# plots a Poisson probability distriburion distribution
<- 0:maxCounts
k <- dpois(k,l)
p names(p) <- as.character(0:maxCounts)
barplot(p,space=0, ...)
}
par(mfrow=c(1,3)) # Splits graphics output into a table of 3 columns
hist.pois(1,15,col="yellow", main="Poisson Dist. with lambda=1")
hist.pois(10,20,col="yellow", main="Poisson Dist. with lambda=10")
hist.pois(30,60,col="yellow", main="Poisson Dist. with lambda=30")
par(mfrow=c(1,1)) # back to default
Example 20
Construct a standard normal probability distribution and plot its density function
<- seq(-4, 4, length = 1000)
x <- dnorm(x)
y <- -1
a <- pnorm(a)
a1 plot(x, y, axes = FALSE, type = 'l', xlab = '', ylab = '',
main = expression(italic(P(X<=a))));
abline(h = 0)
<- x[x <= a]
x1 <- dnorm(x1)
y1 <- c(-4, x1, x1[length(x1)], -4)
xcoord <- c(0, y1, 0, 0)
ycoord polygon(xcoord, ycoord, col = 'grey90')
axis(1, at = c(-1), font = 8,labels = c('a'))
<- paste("P(X<=a) = \r", as.character(round(a1, digits=4)) )
labText text(-2.5,0.2, labels = labText)
Example 21
Construct a uniform probability distribution and plot its density function
<- seq(0, 10, length = 1000)
x <- dunif(x,0,10)
y plot(x, y, axes = TRUE, xaxt="n", type = 'l', xlab = 'X', ylab = 'Density', ylim=c(0,0.15),
main = "Uniform Distribution", bty="n")
#abline(v = 0)
<- list(x=c(0,0),y=c(0,0.1))
pts lines(pts)
lines(list(x=c(10,10),y=c(0,0.1)))
axis(1,pos=c(0,0))
Simulate 1000 samples of size 3, 12 and 30 and calculate the sampling means of scores that are not normally distr.
<- 1000
N <- 3
n1 <- 12
n2 <- 30
n3 <- 1
n4
<- NULL
vec_n1 <- NULL
vec_n2 <- NULL
vec_n3 <- NULL
vec_n4
while (N>0)
{<-N-1;
N<-sample(x, n1);
samp_n1<-sample(x, n2);
samp_n2<-sample(x, n3);
samp_n3<-sample(x, n4);
samp_n4
<-c(vec_n1,mean(samp_n1));
vec_n1<-c(vec_n2,mean(samp_n2));
vec_n2<-c(vec_n3,mean(samp_n3));
vec_n3<-c(vec_n4,mean(samp_n4));
vec_n4
}
<- hist(vec_n1, breaks=16, xlab="Sample Means",main="Distribution of the sample mean with n = 3", freq=F, col= "grey") n1_Hist
<- hist(vec_n2, breaks=16, xlab="Sample Means",main="Distribution of the sample mean with n = 12", freq=F, col= "grey") n2_Hist
<- hist(vec_n3, breaks=16, xlab="Sample Means",main="Distribution of the sample mean with n = 30", freq=F, col= "grey") n3_Hist
<- hist(vec_n4, breaks=16, xlab="Sample Means",main="Distribution of the sample mean with n = 1", freq=F, col= "grey") n4_Hist
#Setup up coordinate system (with x == y aspect ratio):
plot(c(-2,3), c(-1,5), type = "n", xlab = "x", ylab = "y", asp = 1)
#the x- and y-axis, and an integer grid
abline(h = 0, v = 0, col = "gray60")
text(1,0, "abline( h = 0 )", col = "gray60", adj = c(0, -.1))
abline(h = -1:5, v = -2:3, col = "lightgray", lty = 3)
abline(a = 1, b = 2, col = 2)
text(1,3, "abline( 1, 2 )", col = 2, adj = c(-.1, -.1))
<- list(x = cars[,1], y = cars[,2])
pts plot(pts)
Example 22
Visualization the mean weight gain during pregnancy is 30 pounds, with a standard deviation of 12.9 pounds. Weight gain during pregnancy is skewed right. In a certain neighborhood the mean of a sample with 35 persons is 36.2. Is this result unusual?
With a sample size n > 29 we can assume that the sampling distribution of the sample mean is normal with: - mean of the sampling distr.= mean of the population = 30 - SD of the sampling distr.= SD of the populatio/sqrt(n)
Construct a standard normal probability distribution and plot its density function
<- 35
n <- 36.2
mean_of_Sample <- 30
mean_of_SampDist <- 12.9/sqrt(n)
sd_of_SampDist
# create a range of possible sample mean values
<- seq(19,41, length = 1000)
x # determine the corresponding probability densities
<- dnorm(x, mean=mean_of_SampDist, sd=sd_of_SampDist)
y
# dertermine the lower and upper bound (quantiles) that
# delimit the range of possible sample mean values that represent 95% of the possible sample mean values
<- mean_of_SampDist - 1.96 * sd_of_SampDist
l <- mean_of_SampDist + 1.96 * sd_of_SampDist
u
# plot the density curve without axis, titles, legends, ...
plot(x, y,
axes = FALSE,
type = 'l',
xlab = '',
ylab = '',
ylim = c(0, 0.22),
main = ''
);
# add a horizontal axis at probability density = 0
abline(h = 0)
# add a diagram title
text(mean_of_SampDist,
0.21,
# expression( paste("Sampling Distribution of the Sample Mean ",
# bar(x) ,
# ", with n = 35 --> " ,
# bar(x) ,
# " ~ N(" ,
# mu[bar(x)] == mu ,
# ", " ,
# sigma[bar(x)] == sigma/sqrt(n),
# ")"
# )
# )
expression( atop("Sampling Distribution of the Sample Mean "~bar(x)~"," ,
"with n = 35 --> "~bar(x)~" ~ N("~mu[bar(x)]==mu~", "~sigma[bar(x)]==sigma/sqrt(n)~")" )
)
)
# add a title for the axis
text(40, 0,
labels = expression( bar(x) ),
cex=1.0,
pos=3
)
# add a tic mark and label for the center of the distribution
axis(1,
at = c(mean_of_SampDist),
font = 8,
labels = expression( paste("", mu[bar(x)] == mu , "\n" , "= 30") ),
cex=0.3
)
# fill the central 95% under ther curve with blue color
<- x[x > l & x < u]
x3 <- dnorm(x3, mean=mean_of_SampDist, sd=sd_of_SampDist)
y3 <- c(l, x3, x3[length(x3)], l)
xcoord <- c(0, y3, 0, 0)
ycoord polygon(xcoord, ycoord, col = 'cornflowerblue')
# label the blue part
text(mean_of_SampDist,0.05,
labels = "95%",
cex=1.8,
col="white"
)
text(mean_of_SampDist,0.02,
labels = expression(paste("P(", mu[bar(x)] - 1.96 * sigma[bar(x)] <= bar(x), " and " , bar(x) <= mu[bar(x)] + 1.96 * sigma[bar(x)] , ") = " , 0.95 , "" )),
cex=0.8,
col="white"
)
<- pnorm(l, mean=mean_of_SampDist, sd=sd_of_SampDist)
a1 <- pnorm(u, mean=mean_of_SampDist, sd=sd_of_SampDist)
a2
# add a tic mark and label for the lower and upper bound of the central 95% of the distribution
axis(1,
at = c(l),
font = 8,
labels = expression( paste("", mu[bar(x)] - 1.96 * sigma[bar(x)] ,"") ),
cex=0.3
)axis(1,
at = c(u),
font = 8,
labels = expression( paste("", mu[bar(x)] + 1.96 * sigma[bar(x)] ,"") ),
cex=0.3
)
# add text that reports the probability for a mean to occur in the tails of the distribution
text(l,0.05,
labels = expression(paste("P(",bar(x) <= mu[bar(x)] - 1.96 * sigma[bar(x)] , ") = ", 0.025 , "" ) ),
pos=2,
cex=0.8)
text(u,0.05,
labels = expression(paste("P(",bar(x) >= mu[bar(x)] + 1.96 * sigma[bar(x)] , ") = ", 0.025 , "" )),
pos=4,
cex=0.8)
# fill the tails with grey color
<- x[x <= l]
x1 <- dnorm(x1, mean=mean_of_SampDist, sd=sd_of_SampDist)
y1 <- c(l, x1, x1[length(x1)], l)
xcoord <- c(0, y1, 0, 0)
ycoord polygon(xcoord, ycoord, col = 'grey90')
<- x[x >= u]
x2 <- dnorm(x2, mean=mean_of_SampDist, sd=sd_of_SampDist)
y2 <- c(u, x2, x2[length(x2)], u)
xcoord <- c(0, y2, 0, 0)
ycoord polygon(xcoord, ycoord, col = 'grey90')
# label the tails
text(l,0.005,
labels = "2,5%",
cex=1.0,
pos=2
)
text(u,0.005,
labels = "2,5%",
cex=1.0,
pos=4
)
# plot a line that represents the mean of the current sample
abline(v=36.2)
axis(1,
at = c(36.2),
font = 8,
cex=0.3,
col="red",
lty=1,
tcl= 2,
labels=""
)# add a lablel to the line that represents the mean of the current sample
text(36.2, 0.02,
labels = expression( bar(x)==36.2 ),
cex=1.0,
pos=4
)