# This R code pertains to the bootstrapping results of Ch. 10 of E&T.
set.seed(321)

# First I'll read in the data and compute the y and z values.  
patch <- read.table("http://mason.gmu.edu/~csutton/EandTT101.txt", header=TRUE)
attach(patch)
patch
z <- oldpatch - placebo
y <- newpatch - oldpatch
theta.hat <- mean(y)/mean(z)
theta.hat
# This is the estimate of theta.  ( (10.11) on p. 128 )

# Now I'll create 400 bootstrap samples of iIDs.
resampled.ids <- sample( id, size=400*8, replace=T )
bssid <- matrix( resampled.ids, nrow=400 ) 

# I'll store the 400 replicates of theta hat in th.rep.
th.rep <- numeric()
for ( i in 1:400 ) th.rep[i] <- mean( y[bssid[i,]] )/mean( z[bssid[i,]] )

bs.bias.est <- mean( th.rep ) - theta.hat
bs.bias.est
# This is the bootstrap estimate of bias.  ( similar to (10.13) on p. 128 )
# Note: It shouldn't be too surprising that this estimate differs appreciably
# from the one given by (10.13) of E&T, since in Sec. 10.5 of E&T it was 
# concluded that the bootstrap estimator of bias has relatively large variability.

bs.med.bias.est <- median( th.rep ) - theta.hat
bs.med.bias.est 
# This is the bootstrap estimate of median bias.  (See (10.39) on p. 137.)

bs.se.est <- sd( th.rep ) 
bs.se.est
# This is the bootstrap estimate of s.e..  ( similar to result near middle of p. 128 )

hist( th.rep )
# The histogram is similar to the one in Fig. 10.1 on p. 129.

# To compute the better bootstrap estimate of bias, the average of the resampling vectors 
# is needed.
ave.resamp.vec <- numeric()
for (i in 1:8 ) {
                  digit.of.interest <- resampled.ids
                  digit.of.interest[ digit.of.interest != i ] <- 0
                  ave.resamp.vec[i] <- mean( digit.of.interest )/i
                 } 
ave.resamp.vec
mean(ave.resamp.vec)

bet.bs.bias.est <- mean( th.rep ) - sum( ave.resamp.vec*y )/sum( ave.resamp.vec*z )
bet.bs.bias.est
# This is the better (aka improved) bootstrap estimate of bias.  
# ( similar to (10.27) on p. 132 )

#
# To create a plot like Fig. 10.2 on p. 133 of E&T, I'll use a loop and for each
# pass through the loop computations like some of those done by the code above will
# be repeated for a different value of B.
B <- c(25,50,100,200,400,800,1600,3200)
resampled.ids <- sample( id, size=100000*8, replace=T )
bssy <- matrix( y[resampled.ids], nrow=100000, byrow=T )
bssz <- matrix( z[resampled.ids], nrow=100000, byrow=T )
ybar.rep <- apply(bssy, 1, mean)
zbar.rep <- apply(bssz, 1, mean)
th.rep <- ybar.rep/zbar.rep

bias.est <- numeric()
digit <- numeric()
bet.bias.est <- numeric()
for (b in 1:8) {
  bias.est[b] <- mean( th.rep[1:B[b]] ) - theta.hat 
  for (i in 1:8 ) {                    
                     num <- 8*B[b]
                     digit <- resampled.ids[1:num]
                     digit[ digit != i ] <- 0
                     ave.resamp.vec[i] <- mean( digit )/i
                   } 
  bet.bias.est[b] <- mean( th.rep[1:B[b]] ) - sum( ave.resamp.vec*y )/sum( ave.resamp.vec*z )
 }

plot( B, bias.est, ylim=c(-0.01,0.04), ylab="estimated bias" )
lines( B, bias.est, col = 2 )
lines( B, bet.bias.est, col = 4 )
legend( 2000, 0.03, legend=c("simple", "improved" ,"B=100,000"), fill=c(2,4,8))

# I'll add a line for the B = 100,000 estimate.
bias.est.infinity <- mean( th.rep ) - theta.hat
abline( h=bias.est.infinity, col=8 )
bias.est.infinity
# Here is an estimate of the limiting value of the bootstrap estimate of bias
# based on the simple estimator and 100,000 bootstrap samples.  It matches the 
# corresponding estimate given in E&T (see p. 132 and Fig. 10.2) when rounded
# to two significant digits.  (Note: It may be that the improved estimate based 
# on 3200 bootstrap samples is just as good, or maybe even better.)