Saturday, January 3, 2015

Granville regression?



granville.R

davej — Jan 3, 2015, 7:38 PM

#Granville regression
library(huge)
Loading required package: Matrix
Loading required package: lattice
Loading required package: igraph
Loading required package: MASS
library(MASS)
library(ggplot2)

create.test.data.random= function(){
  #normally distributed data with non-trivial correlation structure
  dimension = 50
  num.observations = 10000
  prob.correlated = 0.2
  L = huge.generator(n=num.observations,d=dimension,graph="random",prob=prob.correlated,v=5.0)
  plot(L)
  par(mfrow=c(1,1))
  df=data.frame(L$data)
  #normalize
  df=scale(df)
  df=data.frame(df)
  return(df)
}

granville.coeffs.raw = function(df){
  cov.matrix=cov(df)
  coeffs=c()
  n=ncol(df)
  for (i in 1:n) {
    coeffs[i]=cov.matrix[1,i]/cov.matrix[i,i]
  }
  coeffs=coeffs[2:n]
  return(coeffs)  
}

lm.coeffs=function(df){
  n=ncol(df)
  linear.model=lm(X1~.,data=df)
  coeffs=linear.model$coefficients
  #ignore the intercept
  coeffs=coeffs[2:n]
  return(coeffs)  
}

ridge.coeffs=function(df,lambda=0.1){
  n=ncol(df)
  ridge.model=lm.ridge(X1~.,data=df,lambda=lambda)
  coeffs=ridge.model$coef
  #ignore the intercept
  coeffs=coeffs[2:n-1]
  return(coeffs)  
}

test.granville = function(){
  df=create.test.data.random()
  truth=lm.coeffs(df)
  granville.raw = granville.coeffs.raw(df)
  ridge.coeff=ridge.coeffs(df)
  par(mfrow=c(1,2))
  plot(truth,granville.raw,xlab='True Coefficients')
  title('Granville')
  plot(truth,ridge.coeff,xlab='True Coefficients')
  title('Ridge')
}