# Here is R code to obtain results similar to those given in # Table 6.1 on p. 50 of E&T. (The results aren't expected to # be exactly the same since E&T didn't use the same random # number generator and seed.) # I'll set the random number seed. set.seed(321) # First I need to get the data in. For a Ch. 7 example I will # show you how to read data in from a file, but now I'll just # type it in here. LSAT <- c(576,635,558,578,666,580,555,661,651,605,653,575,545,572,594) GPA <- c(3.39,3.30,2.81,3.03,3.44,3.07,3.00,3.43,3.36,3.13,3.12,2.74,2.76,2.88,2.96) ID <- c(1:length(GPA)) lsdata <- cbind(ID,LSAT,GPA) # Now let's look at it. lsdata # Now I will create B bootstrap samples of the IDs. To do # this I'll use a bootstrap function from an earlier example. # Here's the function. cdsbootstrap <- function(x, B) { bssamples <- matrix( sample( x, size=B*length(x), replace=T ), nrow=B ) return(bssamples) } # Now I'll use this function, setting B to be 25, and putting # the first column of lsdata (the IDs) as x. lsbss <- cdsbootstrap( lsdata[,1], 25 ) # Each row of the matrix lsbss now contains 15 ID numbers # corresponding to a single bootstrap sample. For each # bootstrap sample we want the correlation of the LSAT and # GPA values. For the first bootstrap sample the LSAT values # are obtained below. templsat <- lsdata[lsbss[1,],2] # Let's check to see that this is doing what I want it to do. # We can check by looking at the 15 entries of the first row # of lsbss, and then looking at the 15 entries of templsat # and checking that they are the correct LSAT values. lsbss[1,] templsat # It's doing what I want. I will now take the 15 LSAT values # for the first bootstrap sample and the 15 GPAs for the first # bootstrap sample and obtain their correlation. tempgpa <- lsdata[lsbss[1,],3] cor( templsat, tempgpa ) # Now that I've figured out how to get a sample correlation from # a bootstrap sample, I'll write a function that will get the # sample correlations from B bootstrap samples and obtain the # sample standard deviation of the B correlation values (which is # the estimated standard error). sesampcor <- function( origdata, bss, B){ sampcor <- numeric() for (b in 1:B) sampcor[b] <- cor( origdata[bss[b,],2], origdata[bss[b,],3] ) seestimate <- sd(sampcor) return(seestimate) } # Now I will put the 8 values of B of interest in a vector # and use a for loop to create the bootstrap samples and # obtain the estimated standard error for each value of B. B <- c(25,50,100,200,400,800,1600,3200) estimatedse <- numeric() for (i in 1:8) { lsbss <- cdsbootstrap( lsdata[,1], B[i] ) estimatedse[i] <- sesampcor( lsdata, lsbss, B[i] ) } # Now let's look at the results. results <- cbind(B, estimatedse) results