# Hypothesis testing example from Sec. 16.5 of E&T. set.seed(321) library(bootstrap) attach(stamp) # stamp is a data set from the bootstrap package --- # it has one variable, Thickness. hist( Thickness, prob=T ) lines( density( Thickness, kernel="gaussian", bw=0.003 ) ) # I'll check that a window size of 0.0068 produces a # density estimate having just one mode. den.est <- density( Thickness, kernel="gaussian", bw=0.0068 ) # I'll create a vector of 511 differences of the form: # density estimate at ith point - density estimate at (i-1)th point. diff <- den.est$y[-1] - den.est$y[-512] # I'll determine the number of points (at which den.est is computed) # for which one point has a positive diff value and the next point # has a negative diff value. mode.check <- sign( diff[-511] ) - sign( diff[-1] ) num.modes <- length( mode.check[ mode.check == 2 ] ) # Here is the number of modes: num.modes # Now I'll check to see if a window size of 0.0067 yields more than # one mode. den.est <- density( Thickness, kernel="gaussian", bw=0.0067 ) diff <- den.est$y[-1] - den.est$y[-512] mode.check <- sign( diff[-511] ) - sign( diff[-1] ) num.modes <- length( mode.check[ mode.check == 2 ] ) num.modes # Now I'll create 5000 bootstrap samples, using (16.22). bss.y.star <- matrix( sample( Thickness, 485*5000, replace=T ), nrow=5000 ) norm.term <- matrix( rnorm( 485*5000 ), nrow=5000 ) y.star.mean <- apply( bss.y.star, 1, mean ) sr.factor <- sqrt( 1 + ( 0.0068^2 )/( var( Thickness )*484/485 ) ) bss.x.star <- y.star.mean + ( bss.y.star - y.star.mean + 0.0068*norm.term )/sr.factor # I'll check to see what the average value of the sample variances are for these # bootstrap samples and compare it to the sample variance obtained from the original # sample of thickness values. # Here is the mean of the variances of the bootstrap samples: mean( apply( bss.x.star, 1, var ) ) # Here is the sample variance of the original data: var( Thickness ) # For comparison, here is the mean of the sample variances from bss.y.star (the # samples before the rescaling was done). mean( apply( bss.y.star, 1, var ) ) # *** Something seems odd here. The very slight change due to rescaling was not in # the correct direction. Please let me know if you spot anything incorrect in how # I did the rescaling according to (16.22) on p. 231 of E&T. (I checked things # rather carefully.) # The function defined below will be applied to each bootstrap sample to determine # the number of modes that occur when a Gaussian kernel density estimate is created # from the sample using bw=0.0068. modecount <- function( x ) { den <- density( x, kernel="gaussian", bw=0.0068 ) dendiff <- den$y[-1] - den$y[-512] modecheck <- sign( dendiff[-511] ) - sign( dendiff[-1] ) nummodes <- length( modecheck[ modecheck == 2 ] ) nummodes } # Now I'll apply this function and then determine the approximate p-value. modes <- apply( bss.x.star, 1, modecount ) p.value <- ( 5000 - length( modes[ modes == 1 ] ) )/5000 # Here is the approximate p-value: p.value