Homework 3 Solutions

Ch. 5, Coding Question 1

set.seed(1234) # setting the seed means that we will get the same results
x <- rexp(100) # make 100 draws from an exponential distribution

library(ggplot2)

# make histogram
ggplot(data.frame(x=x), aes(x=x)) +
  geom_histogram() +
  theme_bw()
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Ch. 5, Coding Question 2

load("fertilizer_2000.RData")
# load packages
library(ggplot2)

# make scatter plot
ggplot(data=fertilizer_2000,
       mapping=aes(x=avfert, y=avyield)) +
  geom_point() +
  ylab("Crop Yield") +
  xlab("Fertilizer") +
  theme_bw()

Ch. 8, Coding Question 1

# load data
library(Ecdat)
data("Airq", package="Ecdat")

# a) estimate mean rainfall
ybar <- mean(Airq$rain)
ybar
[1] 36.078
# b) standard error
V <- var(Airq$rain)
n <- nrow(Airq)
se <- sqrt(V)/sqrt(n)
se
[1] 2.462628
# c) t-statistic
h0 <- 25
t <- (ybar-h0)/se
t
[1] 4.498446

Since \(|t| > 1.96\), we would reject \(H_0\) at the 5% significance level.

# d) p-value
pval <- 2*pnorm(-abs(t))
pval
[1] 6.845183e-06

There is virtually a 0 percent chance of getting a t-statistic this large in absolute value if the null hypotheses were true.

# e) confidence interval
ciL <- ybar - 1.96*se
ciU <- ybar + 1.96*se
paste0("[",round(ciL,3),", ", round(ciU,3), "]")
[1] "[31.251, 40.905]"
# f) summary statistics
library(modelsummary)
datasummary_balance(~coas, Airq)
no (N=9) yes (N=21)
Mean Std. Dev. Mean Std. Dev. Diff. in Means Std. Error
airq 125.3 10.5 95.9 28.7 -29.5 7.2
vala 4118.2 5909.8 4218.6 4136.7 100.4 2166.9
rain 32.3 7.6 37.7 15.2 5.4 4.2
dens 1706.4 3014.6 1738.1 2821.2 31.7 1178.5
medi 6290.3 10065.4 10842.2 13396.8 4551.9 4450.1