## R code used in Statistical Analysis of Financial Data ## James E. Gentle # This is the code that produces graphics in Chapter 1. # Most of these plots are of financial data that were obtained using # getSymbols in quantmod. library(quantmod) INTCd20173Q <- getSymbols("INTC", env=NULL, from="2017-1-1", to="2017-9-30", periodicity="daily", warnings=FALSE) # Put the raw closes in an atomic vector x x <- as.numeric(INTCd20173Q[,4]) # Get the day of the year corresponding to each observation. td <- as.numeric(strftime(rownames(as.data.frame(INTCd20173Q)), format="%j")) # The index of td matches the index of x # Form days of year corresponding to end of month tm <- as.numeric(strftime( c("2017-01-31","2017-02-28","2017-03-31","2017-04-28", "2017-05-31","2017-06-30","2017-07-31","2017-08-31", "2017-09-29"),format="%j")) # Form days of year corresponding to end of week (from Jan 6) tw <- seq(as.numeric(strftime("2017-01-06",format="%j")), as.numeric(strftime("2017-09-30",format="%j")), 7) # Adjust for Easter Friday EF <- match(as.numeric(strftime("2017-04-14",format="%j")),tw) tw[EF] <- tw[EF]-1 # The indexes of tw and tm do not match the indexes of x, # must use indirect addressing oldparreset <- par(no.readonly=T) par(mfrow=c(1,2)) plot(tw,x[match(tw,td)],typ="p",xlab="Time",ylab="Quantity", main="",xaxt="n") plot(tw,x[match(tw,td)],typ="b",xlab="Time",ylab="Quantity", main="",xaxt="n") par(oldparreset) ########################################### oldparreset <- par(no.readonly=T) par(mfrow=c(2,2)) plot(td, x, xlab="Time", ylab="Closing Price", typ="l", col="black", main="Daily Closes") plot(tw, x[match(tw,td)], xlab="Time",ylab="Closing Price", typ="l", col="blue", main="Weekly Closes") plot(tm, x[match(tm,td)], xlab="Time", ylab="Closing Price", typ="l",col="green", main="Monthly Closes") plot(td, x, xlab="Time",ylab="Closing Price", typ="l",col="black", main="Composite") lines(tw, x[match(tw,td)], col="blue") lines(tm, x[match(tm,td)], col="green") par(oldparreset) ########################################### library(Quandl) EURUSD <- Quandl("CURRFX/EURUSD", collapse="monthly", start_date="1999-01-01", end_date="2017-12-31", type="xts") plot(EURUSD[,1], ylab="USD / Euro", main="US Dollar per Euro") ########################################### simple <- (-100:100)/500 logreturn <- log(1+simple) simple <- 100*simple # make percentages logreturn <- 100*logreturn plot(simple, logreturn, typ="l", xlab="Simple Return (Percent)", ylab="Log Return (Percent)") abline(h=0) abline(v=0) lines(c(5,5), c(0,100*log(1+.05))) lines(c(0,5), c(100*log(1+.05),100*log(1+.05))) arrows(11, 100*log(1+.05), 5, 100*log(1+.05), angle=10) text(15,100*log(1+.05),"4.88") lines(c(-5,-5), c(0,100*log(0.95))) lines(c(-5,0), c(100*log(0.95),100*log(0.95))) arrows(-12, 100*log(0.95), -5, 100*log(0.95), angle=10) text(-15, 100*log(0.95), "-5.13") ########################################### library(quantmod) INTC <- getSymbols("INTC", env=NULL, from="2017-1-1", to="2017-3-31", periodicity="daily", warnings=FALSE) change <- as.numeric(OpCl(INTC)) volume <- as.numeric(INTC[,5]) colors <- ifelse(change>0, "green", "red") barplot(volume/1000000, col=colors, ylab="Million") ########################################### library(quantmod) INTC <- getSymbols("INTC", env=NULL, from="2017-1-1", to="2017-3-31", periodicity="daily", warnings=FALSE) chartSeries(INTC, type = "candlesticks", theme=chartTheme('white',up.col='green',dn.col='red')) ########################################### z <- getSymbols("^DJI", env=NULL, from="1987-1-1", to="2017-12-31", periodicity="daily", warnings=FALSE) DJId <- as.numeric(z[,6]) # Dow Jones Industrials z <- getSymbols("^GSPC", env=NULL, from="1987-1-1", to="2017-12-31", periodicity="daily", warnings=FALSE) GSPCd <- as.numeric(z[,6]) # S&P 500 GSPCdScaled <- GSPCd*DJId[1]/GSPCd[1] z <- getSymbols("^IXIC", env=NULL, from="1987-1-1", to="2017-12-31", periodicity="daily", warnings=FALSE) IXICd <- as.numeric(z[,6]) # Nasdaq Composite IXICdScaled <- IXICd*DJId[1]/IXICd[1] num <- length(DJId) plot(DJId, type="l", ylab="Scaled Daily Closes", yaxt="n", xlab="January 1987 to December 2017",xaxt="n", xlim=c(0,num+1000), ylim=c(min(DJId),max(DJId,GSPCdScaled,IXICdScaled))) h<-5*num/30 # length of 5 years. beginning of 1990 =4*h/5 axis(1,at=seq(4*(h+1)/5,num,h), labels=c("1990","1995","2000","2005","2010","2015")) text(num+600,DJId[num],"DJIA") lines(GSPCdScaled,col="red") text(num+600,GSPCdScaled[num],"S&P500",col="red") lines(IXICdScaled,col="blue") text(num+600,IXICdScaled[num],"Nasdaq",col="blue") ########################################### z <- getSymbols("^DJI", env=NULL, from="1987-1-1", to="2017-12-31", periodicity="daily", warnings=FALSE) DJIdz <- as.numeric(z[,6]) # Dow Jones Industrials z <- getSymbols("^GSPC", env=NULL, from="1987-1-1", to="2017-12-31", periodicity="daily", warnings=FALSE) GSPCdz <- as.numeric(z[,6]) # S&P 500 z <- getSymbols("^IXIC", env=NULL, from="1987-1-1", to="2017-12-31", periodicity="daily", warnings=FALSE) IXICdz <- as.numeric(z[,6]) # Nasdaq Composite GSPCdScaled <- GSPCdz*DJIdz[1]/GSPCdz[1] IXICdScaled <- IXICdz*DJIdz[1]/IXICdz[1] # Plot Nasdaq Composite first, because it is most variable plot(log(IXICdScaled),type="l",ylab="Daily Closes (Log Scale)", xlab="January 1987 to December 2017",xaxt="n",yaxt="n", xlim=c(0,num+1000), ylim=c(min(log(DJIdz)),max(log(DJIdz), log(GSPCdScaled),log(IXICdScaled))), ylog=TRUE,col="blue") h<-5*num/30 # length of 5 years. beginning of 1990 =4*h/5 axis(1,at=seq(4*(h+1)/5,num,h),labels=c("1990","1995","2000", "2005","2010","2015")) lines(log(GSPCdScaled),col="red") lines(log(DJIdz)) legend("bottomright",legend=c("DJIA", "S&P500", "Nasdaq"), col=c("black", "red", "blue"),lty=c(1,1,1)) ########################################### z <- getSymbols("^GSPC", env=NULL, from="1987-1-1", to="2017-12-31", periodicity="daily", warnings=FALSE) GSPCd <- as.numeric(z[,6]) GSPCdReturns <- diff(log(GSPCd)) z <- getSymbols("UPRO", env=NULL, from="1987-1-1", to="2017-12-31", periodicity="daily", warnings=FALSE) UPROd <- as.numeric(z[,4]) UPROdReturns <- diff(log(UPROd)) GSPCdReturnsUPRO <- GSPCdReturns[(length(GSPCdReturns)-length(UPROdReturns)+1): length(GSPCdReturns)] oldparreset <- par(no.readonly=T) par(mfrow=c(2,2)) first <- 12 # July 13, 2009 ## use z[11:19,] to get these last <- first+3 plot(GSPCdReturnsUPRO[first:last], ylab="Log Return", type="l", xlab="Day of Week", xaxt="n", ylim=c(min(GSPCdReturnsUPRO[first:last], UPROdReturns[first:last]), max(GSPCdReturnsUPRO[first:last], UPROdReturns[first:last])), col="red", main="Week of July 13, 2009") axis(1,at=1:4,labels=c("Tue","Wed","Thu","Fri")) lines(UPROdReturns[first:last]) first <- 771 # July 16, 2012 ## use z[750:770,] last <- first+3 plot(GSPCdReturnsUPRO[first:last], ylab="Log Return", type="l", xlab="Day of Week", xaxt="n", ylim=c(min(GSPCdReturnsUPRO[first:last], UPROdReturns[first:last]), max(GSPCdReturnsUPRO[first:last], UPROdReturns[first:last])), col="red", main="Week of July 16, 2012") axis(1,at=1:4,labels=c("Tue","Wed","Thu","Fri")) lines(UPROdReturns[first:last]) first <- 1522 # July 13, 2015 ## use z[1521:1530,] last <- first+3 plot(GSPCdReturnsUPRO[first:last], ylab="Log Return", type="l", xlab="Day of Week", xaxt="n", ylim=c(min(GSPCdReturnsUPRO[first:last], UPROdReturns[first:last]), max(GSPCdReturnsUPRO[first:last], UPROdReturns[first:last])), col="red", main="Week of July 13, 2015") axis(1,at=1:4,labels=c("Tue","Wed","Thu","Fri")) lines(UPROdReturns[first:last]) first <- 2024 # July 10, 2017 ## use z[2000:2020,] last <- first+3 plot(GSPCdReturnsUPRO[first:last], ylab="Log Return", type="l", xlab="Day of Week",xaxt="n", ylim=c(min(GSPCdReturnsUPRO[first:last], UPROdReturns[first:last]), max(GSPCdReturnsUPRO[first:last], UPROdReturns[first:last])), col="red", main="Week of July 10, 2017") axis(1,at=1:4,labels=c("Tue","Wed","Thu","Fri")) lines(UPROdReturns[first:last]) par(oldparreset) ########################################### library(quantmod) GSPCd <- getSymbols("^GSPC", env=NULL, from="2017-1-1", to="2017-12-31", periodicity="daily", warnings=FALSE) INTCd <- getSymbols("INTC", env=NULL, from="2017-1-1", to="2017-12-31", periodicity="daily", warnings=FALSE) # annualize daily log returns GSPCd_ret <- 252*diff(log(Cl(GSPCd))) names(GSPCd_ret) <- "GSPC.Return" INTCd_ret <- 252*diff(log(Cl(INTCd))) names(INTCd_ret) <- "INTC.Return" DTBsd <- getSymbols("DTB3", env=NULL, src = "FRED") DTBsd <- DTBsd["20170101/20171231"] # DTBsd is quoted in percentage (annual rate) All <- na.omit(merge(GSPCd_ret, INTCd_ret, DTBsd)) # adjust for percentages S_and_P <- as.numeric(All$GSPC.Return-All$DTB3/100) Intel <- as.numeric(All$INTC.Return-All$DTB3/100) plot(S_and_P, Intel, xlab="S & P 500", main="Annualized Excess Log Returns, 2017") cor(S_and_P, Intel) fit <- lm(Intel~S_and_P) summary(fit) abline(fit$coef[1], fit$coef[2]) ########################################### plot(density(INTCd20173QSimpleReturns), main="Density of INTC Daily Simple Returns", xlab="Return",ylab="Density") ########################################### library(quantmod) DJId <- getSymbols("^DJI", env=NULL, from="1987-1-1", to="2017-12-31", periodicity="daily", warnings=FALSE) GSPCd <- getSymbols("^GSPC", env=NULL, from="1987-1-1", to="2017-12-31", periodicity="daily", warnings=FALSE) IXICd <- getSymbols("^IXIC", env=NULL, from="1987-1-1", to="2017-12-31", periodicity="daily", warnings=FALSE) INTCd <- getSymbols("INTC", env=NULL, from="1987-1-1", to="2017-12-31", periodicity="daily", warnings=FALSE) GLDd <- getSymbols("GLD", env=NULL, from="1987-1-1", to="2017-12-31", periodicity="daily", warnings=FALSE) x <- as.numeric(DJId[,4]) # Dow Jones Industrials xr <- diff(log(x)) y <- as.numeric(GSPCd[,4]) # S&P 500 yr <- diff(log(y)) z <- as.numeric(IXICd[,4]) # Nasdaq Composite zr <- diff(log(z)) w <- as.numeric(INTCd[,4]) # Intel wr <- diff(log(w)) u <- as.numeric(GLDd[,4]) # Gold ur <- diff(log(u)) cor(cbind(x,y,z)) cor(cbind(xr,yr,zr,wr,ur)) cor(cbind(xr,yr,zr,wr),method="spearman") cor(cbind(xr,yr,zr,wr),method="kendall") ########################################### x <- DJId["20170101/"] y <- GSPCd["20170101/"] z <- IXICd["20170101/"] w <- INTCd["20170101/"] u <- GLDd["20170101/"] Dow <- diff(log(as.numeric(x[,4]))) S_and_P <- diff(log(as.numeric(y[,4]))) Nasdaq <- diff(log(as.numeric(z[,4]))) Intel <- diff(log(as.numeric(w[,4]))) Gold <- diff(log(as.numeric(u[,4]))) pairs(cbind(Dow, S_and_P, Nasdaq, Intel,Gold), labels=c("Dow","S&P 500","Nasdaq","Intel","Gold"), main="Log Returns, 2017") ########################################### library(ks) z <- IXICd["20170101/"] w <- INTCd["20170101/"] Nasdaq <- diff(log(as.numeric(z[,4]))) Intel <- diff(log(as.numeric(w[,4]))) kdeobj <- kde(cbind(Nasdaq, Intel)) oldparreset <- par(no.readonly=T) par(mfrow=c(2,2)) plot(kdeobj,cont=c(25,50,75,90,95)) plot(kdeobj,drawpoints=TRUE,cont=c(25,50,75,90,95)) plot(kdeobj,display="persp",col="white") plot(kdeobj,display="image",cont=c(25,50,75,90,95)) par(oldparreset) ########################################### hist(INTCd2017SimpleReturns, freq=FALSE, main="Histogram of INTC Daily Simple Returns", xlab="Return",ylab="Relative Frequency") curve(dnorm(x, mean=mean(INTCd2017SimpleReturns), sd=sd(INTCd2017SimpleReturns)), from=min(INTCd2017SimpleReturns), to=max(INTCd2017SimpleReturns), add=TRUE, col="green") ########################################### dens <- density(GSPCdReturns1990) mu <- mean(GSPCdReturns1990) sd <- sd(GSPCdReturns1990) eps <- min(dnorm(min(GSPCdReturns1990),mean=mu, sd=sd), dnorm(max(GSPCdReturns1990),mean=mu, sd=sd)) y <- ifelse(dens$yp)[1] p1 <- sum(dens[1:j]*(brks[2:(j+1)]-brks[1:j])) cut <- brks[j]-(brks[j]-brks[j-1])*(p1-p)/p1 lines(c(cut,cut),c(0,dens[j-1]),col="red") cut x.coord <- c(brks[1],brks[1],brks[2],brks[3],brks[3],cut,cut, brks[1]) y.coord <- c(0,dens[1],dens[2],dens[2],dens[3],dens[3],0,0) polygon(x.coord, y.coord, density=20, col="red") arrows(brks[2], dens[2]+10, brks[3]+h/3, dens[3]/2, angle=10, col="red") text(brks[2], dens[2]+12, "5%", col="red") arrows(brks[11], dens[11]+30, brks[8]+h/2, dens[8]/10, angle=10, col="blue") text(brks[11], dens[11]+32, "95%", col="blue") arrows(cut+0.004, bins$density[j-1]+10, cut, 0, angle=10, col="red") text(cut+0.004,bins$density[j-1]+12,as.character(round(cut,4)), col="red") ########################################### x <- (-50:60)/10 y <- dt(x-1,2) x05 <- qt(0.05,2) x05+1 ## -1.92 plot(x, y, typ="l", yaxt="n", col="blue", xlab="Return (Percent)", ylab="") abline(0,0) lines(c(x05+1,x05+1), c(0,dt(x05,2)), col="red") x.coord <- seq(-50,x05+1,.01) y.coord <- dt(x.coord-1,2) x.coord <- c(x.coord,x05+1,-50,-50) y.coord <- c(y.coord,0,0,x05+1) polygon(x.coord, y.coord, density=20, col="red") arrows(-3, 0.07, -2.5, 0.01, angle=10, col="red") text(-3, 0.074, "5%", col="red") arrows(4, 0.2, 1, 0.04,angle=10, col="blue") text(4.4, 0.2, "95%", col="blue") arrows(0, 0.07, x05+1, 0, angle=10, col="red") text(0.5, 0.07, "-1.92", col="red") ########################################### ########################################### # add code for tables in Ch 1 ### correlations in the tails library(quantmod) DJId <- getSymbols("^DJI", env=NULL, from="1987-1-1", to="2017-12-31", periodicity="daily", warnings=FALSE) GSPCd <- getSymbols("^GSPC", env=NULL, from="1987-1-1", to="2017-12-31", periodicity="daily", warnings=FALSE) IXICd <- getSymbols("^IXIC", env=NULL, from="1987-1-1", to="2017-12-31", periodicity="daily", warnings=FALSE) INTCd <- getSymbols("INTC", env=NULL, from="1987-1-1", to="2017-12-31", periodicity="daily", warnings=FALSE) GLDd <- getSymbols("GLD", env=NULL, from="1987-1-1", to="2017-12-31", periodicity="daily", warnings=FALSE) x <- as.numeric(DJId[,4]) # Dow Jones Industrials xr <- diff(log(x)) y <- as.numeric(GSPCd[,4]) # S&P 500 yr <- diff(log(y)) z <- as.numeric(IXICd[,4]) # Nasdaq Composite zr <- diff(log(z)) w <- as.numeric(INTCd[,4]) # Intel wr <- diff(log(w)) u <- as.numeric(GLDd[,4]) # Gold ur <- diff(log(u)) allr <- cbind(xr,yr,zr,wr) p<- 0.01 qx <- quantile(xr,p) qy <- quantile(yr,p) qz <- quantile(zr,p) qw <- quantile(wr,p) indq <- which(allr[,1] < qx & allr[,2] < qy & allr[,3] < qw & allr[,4] < qu) indq1 <- which(allr[,1] < qx & allr[,2] < qy ) allq1 <- allr[indq1,] cor(allq1) ### this is surprising