# R code to obtain a bootstrap-t interval (w/o the use of any transformation) # for the correlation of LSAT and GPA in the population of law schools. # (The law school data is on p. 19 of E&T.) library(bootstrap) set.seed(321) LSAT <- c(576,635,558,578,666,580,555,661,651,605,653,575,545,572,594) GPA <- c(3.39,3.30,2.81,3.03,3.44,3.07,3.00,3.43,3.36,3.13,3.12,2.74,2.76,2.88,2.96) ID <- c(1:length(GPA)) lsdata <- cbind(ID,LSAT,GPA) corr.hat <- cor( LSAT, GPA ) z.rep <- numeric() corr.hat.rep.2 <- numeric() corr.hat.rep.3 <- numeric() for (b in 1:999) { bs.sample <- sample( ID, size=15, replace=T ) corr.hat.rep.1 <- cor( LSAT[bs.sample], GPA[bs.sample] ) bss <- matrix( sample( bs.sample, size=15*25, replace=T ), nrow=25 ) for (bb in 1:25 ) corr.hat.rep.2[bb] <- cor( LSAT[bss[bb,]], GPA[bss[bb,]] ) se.est.1 <- sd( corr.hat.rep.2 ) z.rep[b] <- ( corr.hat.rep.1 - corr.hat )/se.est.1 } sorted <- sort( z.rep ) bss <- matrix( sample( ID, size=15*25, replace=T ), nrow=25 ) for (bbb in 1:25 ) corr.hat.rep.3[bbb] <- cor( LSAT[bss[bbb,]], GPA[bss[bbb,]] ) se.est <- sd( corr.hat.rep.3 ) # Note: Rather than use the 999 sample correlations computed in the first loop, I # obtained 25 new sample correlations here to make all of the z's have estimated # standard errors in the demoninator based on bootstrap estimates for which B is 25. # upper confidence bound: corr.hat - sorted[50]*se.est # lower confidence bound: corr.hat - sorted[950]*se.est # These confidence bounds are similar to the ones given on the last line of p. 162 # of E&T. Upon running the part of the code that begins with "for (b in 1:999) {" # and goes to the end three additional times, before executing any of the code below, # on the last run the resulting confidence interval was (-0.025, 0.94), which matches # p. 162 much more closely. # Now I'll use the boott function. set.seed(321) theta <- function(x, xdata, ...) { cor( xdata[x,2], xdata[x,3] ) } ci.results <- boott(lsdata[,1], theta, lsdata, nbootsd=25, nboott=999, perc=c(0.05,0.95) ) ci.results$confpoints # A little different than what was obtained previously (above). Perhaps the boott function # uses the random number stream differently. # # I'll investigate the use of the Fisher transformation given by (12.24) on p. 163 of E&T. # First I'll create a function for the inverse function. inv.fish <- function(x) { (exp(2*x) - 1)/(exp(2*x) + 1) } # Now I'll obtain the approximate c.i. based on the assumption of bivariate normality. # (This uses (12.24) and (12.25) from E&T.) I'll start by creating a function for the # Fisher transformation. fish.tran <- function(x, xdata, ...) { corre <- cor( xdata[x,2], xdata[x,3] ) fisher <- log( (1+corre)/(1-corre) )/2 fisher } # upper conf. bound: inv.fish( fish.tran(1:15, lsdata) - 1.6449/sqrt(12) ) # lower conf. bound: inv.fish( fish.tran(1:15, lsdata) + 1.6449/sqrt(12) ) # Now I'll compute a 90% bootstrap-t c.i. for the Fisher-transformed parameter # and apply the inverse transformation to the endpoints (as described on p. 163 # of E&T). This avoids assuming approximate bivariate normality. ci.results <- boott(lsdata[,1], theta=fish.tran, lsdata, nbootsd=25, nboott=999, perc=c(0.05,0.95) ) ci.results$confpoints inv.fish(ci.results$confpoints) # Now I'll try the automatic variance-stabilization vootstrap-t (using the default values for the # arguments). ci.results <- boott(lsdata[,1], theta, lsdata, VS=TRUE, perc=c(0.05,0.95) ) ci.results$confpoints # This is fairly close to the 90% c.i., (0.33, 0.92), given on the last line of p. 164 of E&T. # Finally, I'll adjust the argument values away from the default settings, making them larger # in an attempt to improve accuracy. ci.results <- boott(lsdata[,1], theta, lsdata, VS=TRUE, v.nbootg=160, v.nbootsd=200, v.nboott=999, perc=c(0.05,0.95) ) ci.results$confpoints