LASSO Regression - L1 Regularization
<- function(X, Y, rm_na=T, standardise=F, lambda, tol=1e-6, max_iter=1e+5) {
lasso
if (!is.matrix(X)) {
<- as.matrix(X)
X
}
if (rm_na) {
<- complete.cases(X) & complete.cases(Y)
index <- X[index,]
X <- Y[index]
Y
}
if (standardise) {
<- apply(X, 2, function(x) (x - mean(x))/sd(x))
X
}
# if (intercept) {
# X <- cbind(rep(1,nrow(X)), X)
# colnames(X)[1] <- "(Intercept)"
# }
# optim's methods won't produce variable selection - see https://stats.stackexchange.com/questions/121209/how-can-i-implement-lasso-in-r-using-optim-function
# so estimate via coordinate descent
<- ncol(X)
K <- numeric(ncol(X))
b # b <- solve(crossprod(X) + lambda*diag(K)) %*% crossprod(X, Y)
names(b) <- colnames(X)
<- function(b, l) {
soft_thresh
ifelse(l < abs(b), sign(b)*(abs(b) - l), 0)
}
<- 1
current for (iter in 1:max_iter) {
<- b
b_old
for (k in 1:K) {
<- Y - X[,-k] %*% b[-k]
r <- soft_thresh(crossprod(X[,k],r), length(Y)*lambda)/crossprod(X[,k]) #length(y) gives consistent results w/ glmnet
b[k]
}
<- norm(as.matrix(b-b_old), "F")
current if (which.min(c(tol,current))==2) break
if (any(is.na(b)) | any(is.nan(b))) break
}
return(b)
}
Testing the function:
library(glmnet)
## Loaded glmnet 4.1-4
<- as.matrix(mtcars[, -1])
X <- apply(X, 2, function(x) (x-mean(x)) / sd(x))
X_standard <- mtcars[[1]]
Y
data.frame(
glmnet = glmnet(X, Y, alpha=1, lambda=0.5, intercept = F, standardize=F)$beta[,1],
lasso = lasso(X, Y, lambda=0.5)
)
## glmnet lasso
## cyl 0.00000000 0.000000000
## disp -0.01476942 -0.012342881
## hp -0.00578982 -0.007107273
## drat 1.24524219 1.226801727
## wt -0.69362931 -1.034551274
## qsec 0.94504513 0.993371825
## vs 0.00000000 0.000000000
## am 0.00000000 0.000000000
## gear 1.73327735 1.637404538
## carb -0.44294482 -0.340473978
data.frame(
glmnet = glmnet(X_standard, Y, alpha=1, lambda=0.5, intercept = F, standardize=F)$beta[,1],
lasso = lasso(X_standard, Y, lambda=0.5)
)
## glmnet lasso
## cyl -1.51425578 -1.53700527
## disp 0.00000000 0.00000000
## hp -0.98389039 -0.96091464
## drat 0.02978391 0.03332529
## wt -2.63442748 -2.62683457
## qsec 0.00000000 0.00000000
## vs 0.00000000 0.00000000
## am 0.23140313 0.22850260
## gear 0.00000000 0.00000000
## carb -0.15265213 -0.16064929