# # Industry Factor Model # # This R code is based on the industry factor model example in Chapter 15 of "Modeling Financial # Time Series with S-Plus,Second Edition" by Zivot and Wang, Springer, 2006. Some of the code was # also borrowed from Guy Yollen's example code for his Financial Data Modeling and Analysis in R, # AMATH 542, University of Washington. # # This code uses the berndtInvest data set, which is part of the fEcofin, written by Rmetrics. # At the time this code was written, the fEcofin library had disappeared from CRAN. However, I had # loaded a copy before it went away. To allow this code to run without the fEcofin library, the # berndtInvest data is loaded from a .csv file. # # Return the indexes in charArrayA where elements of charArrayB appear, or 0 which.ix = function(charArrayA, charArrayB) { ix = c() for (word in charArrayB) { t = which(charArrayA == word) if (length(t) > 0) { ix = c(ix, t) } } if (length(ix) == 0) { ix = c(0) } return(ix) } # # This function is passed a matrix of returns and an optional # covariance matrix. The result is the weights, mean return and # standard deviation of the global minimum variance portfolio, # where the only constraint is full investment # (e.g., shorting is allowed). # analyticGMV = function(returns, S = var(returns)) { Sinv = solve(S) one = rep(1, ncol(S)) denom = as.numeric(t(one) %*% Sinv %*% one) w = (Sinv %*% one) / denom sd = 1 / sqrt(denom) rslt = list(w = round(t(w), 8), mu = mean(returns %*% w), sd = sd) return(rslt) } # # Calculate the weights, mean return and standard deviation of the # tangency portfolio. # analyticTangency = function(returns, rf, S = var(returns)) { mu = apply(returns, 2, mean) Sinv = solve(S) one = rep(1, ncol(S)) mu_e = mu - rf w = (Sinv %*% mu_e) / as.numeric((t(one) %*% Sinv %*% mu_e)) sd = sqrt(t(w) %*% S %*% w) rslt = list(w = round(t(w), 6), mu = sum(w * mu), sd = sd) return(rslt) } # # Given two Markowitz mean/variance optimized portfolio weights, calculate a set of # means and variance values for the efficient frontier using the "two fund theorm". # # returns: a matrix of portfolio returns # gmv_wts: the weights for the Global Minimum Variance portfolio # tan_wts: the weights for the tangency portfolio # S : The covariance matrix # frontierPoints = function(returns, gmv_wts, tan_wts, S, alpha = seq(from=-0.1, to=1.1, by=0.05)) { y_mu = list() x_sd = list() gmv_wts_t = as.vector(gmv_wts) tan_wts_t = as.vector(tan_wts) ix = 1 mu = apply(returns, 2, mean) for (i in alpha) { w = i * gmv_wts_t + ((1-i) * tan_wts_t) y_mu[[ix]] = mu %*% w x_sd[[ix]] = sqrt( t(w) %*% S %*% w) ix = ix + 1 } rslt = list(mu=y_mu, sd=x_sd) return(rslt) } # This is a hack to get around a problem in the fPortfolio code. # This code uses the covariance function, cov(), which is # depricated and now returns a "cov" object. cov = function( time_series.mat ) { return(var(time_series.mat)) } library(zoo) library(fPortfolio) berndt = read.csv(file="berndtInvest.csv") date.col = berndt[[1]] dates = as.Date(date.col) skipCols = c(1, which.ix(colnames(berndt), c("MARKET", "RKFREE"))) rf.ts = as.numeric(berndt[[ "RKFREE"]]) rf = mean(rf.ts) returns = as.matrix(berndt[-skipCols]) # remove the date, market data and risk free rate columns names = colnames(returns) tech = c("DATGEN", "DEC", "IBM", "TANDY") techIx = which.ix(names, tech) oil = c("CONTIL", "DELTA", "MOBIL", "PANAM", "TEXACO") oilIx = which.ix(names, oil) n.stocks = ncol(returns) tech.dum = oil.dum = other.dum = matrix(0,n.stocks,1) tech.dum[techIx,] = 1 oil.dum[oilIx,] = 1 other.dum = 1 - tech.dum - oil.dum B = cbind(tech.dum,oil.dum,other.dum) dimnames(B) = list(colnames(returns),c("TECH","OIL","OTHER")) F.hat = solve(t(B) %*% B) %*% t(B) %*% t(returns) E.hat = t(returns) - B %*% F.hat # Calculate the variances across the rows diagD.hat = apply(E.hat, 1, var) Dinv.hat = diag( diagD.hat^(-1)) # multivariate FGLS regression to estimate K x T matrix of factor returns H = solve(t(B) %*% Dinv.hat %*% B) %*% t(B) %*% Dinv.hat # create factor mimicking portfolios F.hat = H %*% t(returns) colnames(H) = colnames(returns) F.hat = t(F.hat) cov.ind = B %*% var(F.hat) %*% t(B) + diag(diagD.hat) gmv.ind = analyticGMV(returns, cov.ind) tan.ind = analyticTangency( returns, rf, cov.ind) gmv.ind.w = gmv.ind$w tan.ind.w = tan.ind$w gmv.ret = analyticGMV(returns) tan.ret = analyticTangency(returns, rf) gmv.ret.w = gmv.ret$w tan.ret.w = tan.ret$w gmv.x = c(gmv.ind$sd, gmv.ret$sd) gmv.y = c(gmv.ind$mu, gmv.ret$mu) tan.x = c(tan.ind$sd, tan.ret$sd) tan.y = c(tan.ind$mu, tan.ret$mu) frontier.ind = frontierPoints(returns, gmv.ind.w, tan.ind.w, cov.ind ) alpha = seq(from=-2, to=2, by=0.05) frontier.ret = frontierPoints(returns, gmv.ret.w, tan.ret.w, var(returns), alpha) ind.sd = as.numeric(frontier.ind$sd) ind.mu = as.numeric(frontier.ind$mu) ret.sd = as.numeric(frontier.ret$sd) ret.mu = as.numeric(frontier.ret$mu) # Use fPortfolio's portfolioFrontier function to calculate the long-long portfolio frontier spec = portfolioSpec() setNFrontierPoints(spec)=100 returns.ts = as.timeSeries( returns ) longOnly = portfolioFrontier(data=returns.ts, spec=spec) longOnly.port = longOnly@portfolio longOnly.mu = longOnly.port@portfolio$targetReturn[,"mu"] longOnly.sd = longOnly.port@portfolio$targetRisk[,"Sigma"] xmin = min(ind.sd, ret.sd) xmax = max(ind.sd, ret.sd) ymin = min(ind.mu, ret.mu) ymax = max(ind.mu, ret.mu) ix = which(longOnly.mu >= ymin) longOnly.mu.f = longOnly.mu[ix] longOnly.sd.f = longOnly.sd[ix] sharpe = longOnly.mu.f / longOnly.sd.f longOnly.s = which.max(sharpe) par(mfrow=c(1,1)) plot(x=ret.sd, y=ret.mu, xlim=c(xmin, xmax), ylim=c(ymin, ymax), type="l", col="black", ylab="Monthly Portfolio Return", xlab=expression(paste("monthly ", sigma)), lwd=2 ) lines(x = ind.sd, y=ind.mu, type="l", col="red", lwd=2) lines(x = longOnly.sd.f, y = longOnly.mu.f, col="blue", lwd=2) points(x = gmv.x, y = gmv.y, pch=15, col="magenta") points(x = tan.x, y = tan.y, pch=15, col="green") points(y = longOnly.mu.f[longOnly.s], x = longOnly.sd.f[longOnly.s], pch=15, col="red") grid(col="grey") legend(x="topleft", legend=c("MV portfolio", "BARRA Industry Factor", "Long Only"), col=c("black", "red", "blue"), lwd=4) par(mfrow=c(1,1))