Ch. 5, Coding Question 1
set.seed (1234 ) # setting the seed means that we will get the same results
x <- rexp (100 ) # make 100 draws from an exponential distribution
library (ggplot2)
# make histogram
ggplot (data.frame (x= x), aes (x= x)) +
geom_histogram () +
theme_bw ()
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Ch. 5, Coding Question 2
load ("fertilizer_2000.RData" )
# load packages
library (ggplot2)
# make scatter plot
ggplot (data= fertilizer_2000,
mapping= aes (x= avfert, y= avyield)) +
geom_point () +
ylab ("Crop Yield" ) +
xlab ("Fertilizer" ) +
theme_bw ()
Ch. 8, Coding Question 1
# load data
library (Ecdat)
data ("Airq" , package= "Ecdat" )
# a) estimate mean rainfall
ybar <- mean (Airq$ rain)
ybar
# b) standard error
V <- var (Airq$ rain)
n <- nrow (Airq)
se <- sqrt (V)/ sqrt (n)
se
# c) t-statistic
h0 <- 25
t <- (ybar- h0)/ se
t
Since \(|t| > 1.96\) , we would reject \(H_0\) at the 5% significance level.
# d) p-value
pval <- 2 * pnorm (- abs (t))
pval
There is virtually a 0 percent chance of getting a t-statistic this large in absolute value if the null hypotheses were true.
# e) confidence interval
ciL <- ybar - 1.96 * se
ciU <- ybar + 1.96 * se
paste0 ("[" ,round (ciL,3 ),", " , round (ciU,3 ), "]" )
# f) summary statistics
library (modelsummary)
datasummary_balance (~ coas, Airq)
no (N=9)
yes (N=21)
Mean
Std. Dev.
Mean
Std. Dev.
Diff. in Means
Std. Error
airq
125.3
10.5
95.9
28.7
-29.5
7.2
vala
4118.2
5909.8
4218.6
4136.7
100.4
2166.9
rain
32.3
7.6
37.7
15.2
5.4
4.2
dens
1706.4
3014.6
1738.1
2821.2
31.7
1178.5
medi
6290.3
10065.4
10842.2
13396.8
4551.9
4450.1