# This R code pertains to the bootstrapping results of Ch. 10 of E&T. set.seed(321) # First I'll read in the data and compute the y and z values. patch <- read.table("http://mason.gmu.edu/~csutton/EandTT101.txt", header=TRUE) attach(patch) patch z <- oldpatch - placebo y <- newpatch - oldpatch theta.hat <- mean(y)/mean(z) theta.hat # This is the estimate of theta. ( (10.11) on p. 128 ) # Now I'll create 400 bootstrap samples of iIDs. resampled.ids <- sample( id, size=400*8, replace=T ) bssid <- matrix( resampled.ids, nrow=400 ) # I'll store the 400 replicates of theta hat in th.rep. th.rep <- numeric() for ( i in 1:400 ) th.rep[i] <- mean( y[bssid[i,]] )/mean( z[bssid[i,]] ) bs.bias.est <- mean( th.rep ) - theta.hat bs.bias.est # This is the bootstrap estimate of bias. ( similar to (10.13) on p. 128 ) # Note: It shouldn't be too surprising that this estimate differs appreciably # from the one given by (10.13) of E&T, since in Sec. 10.5 of E&T it was # concluded that the bootstrap estimator of bias has relatively large variability. bs.med.bias.est <- median( th.rep ) - theta.hat bs.med.bias.est # This is the bootstrap estimate of median bias. (See (10.39) on p. 137.) bs.se.est <- sd( th.rep ) bs.se.est # This is the bootstrap estimate of s.e.. ( similar to result near middle of p. 128 ) hist( th.rep ) # The histogram is similar to the one in Fig. 10.1 on p. 129. # To compute the better bootstrap estimate of bias, the average of the resampling vectors # is needed. ave.resamp.vec <- numeric() for (i in 1:8 ) { digit.of.interest <- resampled.ids digit.of.interest[ digit.of.interest != i ] <- 0 ave.resamp.vec[i] <- mean( digit.of.interest )/i } ave.resamp.vec mean(ave.resamp.vec) bet.bs.bias.est <- mean( th.rep ) - sum( ave.resamp.vec*y )/sum( ave.resamp.vec*z ) bet.bs.bias.est # This is the better (aka improved) bootstrap estimate of bias. # ( similar to (10.27) on p. 132 ) # # To create a plot like Fig. 10.2 on p. 133 of E&T, I'll use a loop and for each # pass through the loop computations like some of those done by the code above will # be repeated for a different value of B. B <- c(25,50,100,200,400,800,1600,3200) resampled.ids <- sample( id, size=100000*8, replace=T ) bssy <- matrix( y[resampled.ids], nrow=100000, byrow=T ) bssz <- matrix( z[resampled.ids], nrow=100000, byrow=T ) ybar.rep <- apply(bssy, 1, mean) zbar.rep <- apply(bssz, 1, mean) th.rep <- ybar.rep/zbar.rep bias.est <- numeric() digit <- numeric() bet.bias.est <- numeric() for (b in 1:8) { bias.est[b] <- mean( th.rep[1:B[b]] ) - theta.hat for (i in 1:8 ) { num <- 8*B[b] digit <- resampled.ids[1:num] digit[ digit != i ] <- 0 ave.resamp.vec[i] <- mean( digit )/i } bet.bias.est[b] <- mean( th.rep[1:B[b]] ) - sum( ave.resamp.vec*y )/sum( ave.resamp.vec*z ) } plot( B, bias.est, ylim=c(-0.01,0.04), ylab="estimated bias" ) lines( B, bias.est, col = 2 ) lines( B, bet.bias.est, col = 4 ) legend( 2000, 0.03, legend=c("simple", "improved" ,"B=100,000"), fill=c(2,4,8)) # I'll add a line for the B = 100,000 estimate. bias.est.infinity <- mean( th.rep ) - theta.hat abline( h=bias.est.infinity, col=8 ) bias.est.infinity # Here is an estimate of the limiting value of the bootstrap estimate of bias # based on the simple estimator and 100,000 bootstrap samples. It matches the # corresponding estimate given in E&T (see p. 132 and Fig. 10.2) when rounded # to two significant digits. (Note: It may be that the improved estimate based # on 3200 bootstrap samples is just as good, or maybe even better.)