Homework 3 Solutions

Ch. 5, Coding Question 1

set.seed(1234) # setting the seed means that we will get the same results
x <- rexp(100) # make 100 draws from an exponential distribution

library(ggplot2)

# make histogram
ggplot(data.frame(x=x), aes(x=x)) +
  geom_histogram() +
  theme_bw()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Ch. 5, Coding Question 2

load("fertilizer_2000.RData")

# load packages
library(ggplot2)

# make scatter plot
ggplot(data=fertilizer_2000,
       mapping=aes(x=avfert, y=avyield)) +
  geom_point() +
  ylab("Crop Yield") +
  xlab("Fertilizer") +
  theme_bw()

Ch. 8, Coding Question 1

# load data
library(Ecdat)
data("Airq", package="Ecdat")

# a) estimate mean rainfall
ybar <- mean(Airq$rain)
ybar

[1] 36.078

# b) standard error
V <- var(Airq$rain)
n <- nrow(Airq)
se <- sqrt(V)/sqrt(n)
se

[1] 2.462628

# c) t-statistic
h0 <- 25
t <- (ybar-h0)/se
t

[1] 4.498446

Since \(|t| > 1.96\), we would reject \(H_0\) at the 5% significance level.

# d) p-value
pval <- 2*pnorm(-abs(t))
pval

[1] 6.845183e-06

There is virtually a 0 percent chance of getting a t-statistic this large in absolute value if the null hypotheses were true.

# e) confidence interval
ciL <- ybar - 1.96*se
ciU <- ybar + 1.96*se
paste0("[",round(ciL,3),", ", round(ciU,3), "]")

[1] "[31.251, 40.905]"

# f) summary statistics
library(modelsummary)
datasummary_balance(~coas, Airq)

	no (N=9)		yes (N=21)
	Mean	Std. Dev.	Mean	Std. Dev.	Diff. in Means	Std. Error
airq	125.3	10.5	95.9	28.7	-29.5	7.2
vala	4118.2	5909.8	4218.6	4136.7	100.4	2166.9
rain	32.3	7.6	37.7	15.2	5.4	4.2
dens	1706.4	3014.6	1738.1	2821.2	31.7	1178.5
medi	6290.3	10065.4	10842.2	13396.8	4551.9	4450.1