This vignette is dived into three parts. 1. we explain how we use EFCP and VFCP vs. cross validation (CV) and other parametric methods for ridge regression 2. we generate the plots as shown in our paper 3. we compare the tuning parameters chosen by EFCP, VFCP, CV using two splits and CV using three splits.
library(glmnet)
library(MASS)
library(mvtnorm)
#source("ridge_funs.R")
name=paste("linear_fm_t3",sep="")
df <- 3 #degrees of freedom
l <- 60 #number of dimensions
l.lambda <- 100
lambda_seq <- seq(0,200,l=l.lambda)
dim <- round(seq(5,300,l=l))
alpha <- 0.1
n <- 200 #number of training samples
n0 <- 100 #number of prediction points
nrep <- 100 #number of independent trials
rho <- 0.5
cov.efcp_linear_fm_t3 <- len.efcp_linear_fm_t3 <- matrix(0,nrep,l)
cov.vfcp_linear_fm_t3 <- len.vfcp_linear_fm_t3 <- matrix(0,nrep,l)
cov.naive_linear_fm_t3 <- len.naive_linear_fm_t3 <- matrix(0,nrep,l)
cov.param_linear_fm_t3 <- len.param_linear_fm_t3 <- matrix(0,nrep,l)
cov.star_linear_fm_t3 <- len.star_linear_fm_t3 <- matrix(0,nrep,l)
cov.cv10_linear_fm_t3 <- len.cv10_linear_fm_t3 <- matrix(0,nrep,l)
cov.cv5_linear_fm_t3 <- len.cv5_linear_fm_t3 <- matrix(0,nrep,l)
cov.cvloo_linear_fm_t3 <- len.cvloo_linear_fm_t3 <- matrix(0,nrep,l)
out.efcp.up <- out.efcp.lo <- matrix(0,n0,l)
out.vfcp.up <- out.vfcp.lo <- matrix(0,n0,l)
out.naive.up <- out.naive.lo <- matrix(0,n0,l)
out.param.up <- out.param.lo <- matrix(0,n0,l)
out.star.up <- out.star.lo <- matrix(0,n0,l)
out.cv10.up <- out.cv10.lo <- matrix(0,n0,l)
out.cv5.up <- out.cv5.lo <- matrix(0,n0,l)
out.cvloo.up <- out.cvloo.lo <- matrix(0,n0,l)
for(i in 1:nrep){
if(i%%10 == 0){
print(i)
}
for (r in 1:l){
d <- dim[r]
set.seed(i)
Sigma <- matrix(rho,d,d)
diag(Sigma) <- rep(1,d)
X <- rmvt(n,Sigma,df) #multivariate t distribution
beta <- rep(1:5,d/5)
eps <- rt(n,df)*(1+sqrt(X[,1]^2+X[,2]^2))
Y <- X%*%beta+eps
X0 <- rmvt(n0,Sigma,df)
eps0 <- rt(n0,df)*(1+sqrt(X0[,1]^2+X0[,2]^2))
Y0 <- X0%*%beta+eps0
out.param <- ginverse.fun(X,Y,X0,alpha=alpha)
out.param.lo[,r] <- out.param$lo
out.param.up[,r] <- out.param$up
cov.param_linear_fm_t3[i,r] <- mean(out.param.lo[,r] <= Y0 & Y0 <= out.param.up[,r])
len.param_linear_fm_t3[i,r] <- mean(out.param.up[,r]-out.param.lo[,r])
out.efcp <- efcp_ridge(X,Y,X0,lambda=lambda_seq,alpha=alpha)
out.efcp.up[,r] <- out.efcp$up
out.efcp.lo[,r] <- out.efcp$lo
cov.efcp_linear_fm_t3[i,r] <- mean(out.efcp.lo[,r] <= Y0 & Y0 <= out.efcp.up[,r])
len.efcp_linear_fm_t3[i,r] <- mean(out.efcp.up[,r]-out.efcp.lo[,r])
out.vfcp <- vfcp_ridge(X,Y,X0,lambda=lambda_seq,alpha=alpha)
out.vfcp.up[,r] <- out.vfcp$up
out.vfcp.lo[,r] <- out.vfcp$lo
cov.vfcp_linear_fm_t3[i,r] <- mean(out.vfcp.lo[,r] <= Y0 & Y0 <= out.vfcp.up[,r])
len.vfcp_linear_fm_t3[i,r] <- mean(out.vfcp.up[,r]-out.vfcp.lo[,r])
out.naive <- naive.fun(X,Y,X0,alpha=alpha)
out.naive.up[,r] <- out.naive$up
out.naive.lo[,r] <- out.naive$lo
cov.naive_linear_fm_t3[i,r] <- mean(out.naive.lo[,r] <= Y0 & Y0 <= out.naive.up[,r])
len.naive_linear_fm_t3[i,r] <- mean(out.naive.up[,r]-out.naive.lo[,r])
out.star <- star.fun(X,Y,X0,lambda=lambda_seq,alpha=alpha)
out.star.up[,r] <- out.star$up
out.star.lo[,r] <- out.star$lo
cov.star_linear_fm_t3[i,r] <- mean(out.star.lo[,r] <= Y0 & Y0 <= out.star.up[,r])
len.star_linear_fm_t3[i,r] <- mean(out.star.up[,r] - out.star.lo[,r])
out.cv5 <- cv.fun(X,Y,X0,lambda=lambda_seq,alpha=alpha,nfolds=5)
out.cv5.up[,r] <- out.cv5$up
out.cv5.lo[,r] <- out.cv5$lo
cov.cv5_linear_fm_t3[i,r] <- mean(out.cv5.lo[,r] <= Y0 & Y0 <= out.cv5.up[,r])
len.cv5_linear_fm_t3[i,r] <- mean(out.cv5.up[,r] - out.cv5.lo[,r])
}
}
ridge_linear_len100_t3=list(len.param_linear_fm_t3, len.naive_linear_fm_t3, len.vfcp_linear_fm_t3, len.star_linear_fm_t3, len.cv5_linear_fm_t3, len.efcp_linear_fm_t3)
save(ridge_linear_len100_t3, file = "ridge_linear_len100_t3.rda" )
ridge_linear_cov100_t3=list(dim_linear_t3,cov.param_linear_fm_t3, cov.naive_linear_fm_t3, cov.vfcp_linear_fm_t3, cov.star_linear_fm_t3, cov.cv5_linear_fm_t3, cov.efcp_linear_fm_t3)
save(ridge_linear_cov100_t3, file = "ridge_linear_cov100_t3.rda" )
Similarly we do the same when the degree of freedom is 5 (code omitted).
data(ridge_linear_cov100_t3,package = "ConformalSmallest")
data(ridge_linear_len100_t3,package = "ConformalSmallest")
dim=ridge_linear_cov100_t3[[1]]
cov.param_linear_fm_t3=ridge_linear_cov100_t3[[2]]
cov.naive_linear_fm_t3=ridge_linear_cov100_t3[[3]]
cov.vfcp_linear_fm_t3=ridge_linear_cov100_t3[[4]]
cov.star_linear_fm_t3=ridge_linear_cov100_t3[[5]]
cov.cv5_linear_fm_t3=ridge_linear_cov100_t3[[6]]
cov.efcp_linear_fm_t3=ridge_linear_cov100_t3[[7]]
len.param_linear_fm_t3=ridge_linear_len100_t3[[1]]
len.naive_linear_fm_t3=ridge_linear_len100_t3[[2]]
len.vfcp_linear_fm_t3=ridge_linear_len100_t3[[3]]
len.star_linear_fm_t3=ridge_linear_len100_t3[[4]]
len.cv5_linear_fm_t3=ridge_linear_len100_t3[[5]]
len.efcp_linear_fm_t3=ridge_linear_len100_t3[[6]]
len.param = len.param_linear_fm_t3/len.vfcp_linear_fm_t3
len.naive = len.naive_linear_fm_t3/len.vfcp_linear_fm_t3
len.star = len.star_linear_fm_t3/len.vfcp_linear_fm_t3
len.cv5 = len.cv5_linear_fm_t3/len.vfcp_linear_fm_t3
len.efcp = len.efcp_linear_fm_t3/len.vfcp_linear_fm_t3
len.vfcp = len.vfcp_linear_fm_t3/len.vfcp_linear_fm_t3
df.cov3=data.frame(dim,apply(cov.param_linear_fm_t3,2,mean),apply(cov.naive_linear_fm_t3,2,mean),apply(cov.vfcp_linear_fm_t3,2,mean),apply(cov.star_linear_fm_t3,2,mean),apply(cov.cv5_linear_fm_t3,2,mean), apply(cov.efcp_linear_fm_t3,2,mean))
df.cov3_sd=data.frame(dim,apply(cov.param_linear_fm_t3,2,sd),apply(cov.naive_linear_fm_t3,2,sd),apply(cov.vfcp_linear_fm_t3,2,sd),apply(cov.star_linear_fm_t3,2,sd),apply(cov.cv5_linear_fm_t3,2,sd), apply(cov.efcp_linear_fm_t3,2,sd))/sqrt(nrow(len.param))
df.len3=data.frame(dim,apply(len.param,2,mean),apply(len.naive,2,mean),apply(len.vfcp,2,mean),apply(len.star,2,mean),apply(len.cv5,2,mean), apply(len.efcp,2,mean))
df.len3_sd=data.frame(dim,apply(len.param,2,sd),apply(len.naive,2,sd),apply(len.vfcp,2,sd),apply(len.star,2,sd),apply(len.cv5,2,sd), apply(len.efcp,2,sd))/sqrt(nrow(len.param))
data(ridge_linear_cov100_t5,package = "ConformalSmallest")
data(ridge_linear_len100_t5,package = "ConformalSmallest")
dim=ridge_linear_cov100_t5[[1]]
cov.param_linear_fm_t5=ridge_linear_cov100_t5[[2]]
cov.naive_linear_fm_t5=ridge_linear_cov100_t5[[3]]
cov.vfcp_linear_fm_t5=ridge_linear_cov100_t5[[4]]
cov.star_linear_fm_t5=ridge_linear_cov100_t5[[5]]
cov.cv5_linear_fm_t5=ridge_linear_cov100_t5[[6]]
cov.efcp_linear_fm_t5=ridge_linear_cov100_t5[[7]]
len.param_linear_fm_t5=ridge_linear_len100_t5[[1]]
len.naive_linear_fm_t5=ridge_linear_len100_t5[[2]]
len.vfcp_linear_fm_t5=ridge_linear_len100_t5[[3]]
len.star_linear_fm_t5=ridge_linear_len100_t5[[4]]
len.cv5_linear_fm_t5=ridge_linear_len100_t5[[5]]
len.efcp_linear_fm_t5=ridge_linear_len100_t5[[6]]
len.param = len.param_linear_fm_t5/len.vfcp_linear_fm_t5
len.naive = len.naive_linear_fm_t5/len.vfcp_linear_fm_t5
len.star = len.star_linear_fm_t5/len.vfcp_linear_fm_t5
len.cv5 = len.cv5_linear_fm_t5/len.vfcp_linear_fm_t5
len.efcp = len.efcp_linear_fm_t5/len.vfcp_linear_fm_t5
len.vfcp = len.vfcp_linear_fm_t5/len.vfcp_linear_fm_t5
df.cov5=data.frame(dim,apply(cov.param_linear_fm_t5,2,mean),apply(cov.naive_linear_fm_t5,2,mean),apply(cov.vfcp_linear_fm_t5,2,mean),apply(cov.star_linear_fm_t5,2,mean),apply(cov.cv5_linear_fm_t5,2,mean), apply(cov.efcp_linear_fm_t5,2,mean))
df.cov5_sd=data.frame(dim,apply(cov.param_linear_fm_t5,2,sd),apply(cov.naive_linear_fm_t5,2,sd),apply(cov.vfcp_linear_fm_t5,2,sd),apply(cov.star_linear_fm_t5,2,sd),apply(cov.cv5_linear_fm_t5,2,sd), apply(cov.efcp_linear_fm_t5,2,sd))/sqrt(nrow(len.param))
df.len5=data.frame(dim,apply(len.param,2,mean),apply(len.naive,2,mean),apply(len.vfcp,2,mean),apply(len.star,2,mean),apply(len.cv5,2,mean), apply(len.efcp,2,mean))
df.len5_sd=data.frame(dim,apply(len.param,2,sd),apply(len.naive,2,sd),apply(len.vfcp,2,sd),apply(len.star,2,sd),apply(len.cv5,2,sd), apply(len.efcp,2,sd))/sqrt(nrow(len.param))
bgnd <- theme_get()$panel.background$fill
names=c("Linear","Naive","VFCP","CV*","CV-5-fold","EFCP")
#colors_manual=c("blue","#FF9933","#999000","66FFCC","#66CC99","#9999FF","#FF00CC")
colors_manual=c("red","slategrey","darkorchid3","dodgerblue4","turquoise3","grey23")
shape_manual=c(0,1,2,3,4,5)
seq=seq(2,60,by=2)
col_names=c("dim","cov.param","cov.naive","cov.vfcp","cov.star","cov.cv5","cov.efcp")
names(df.cov3)=names(df.cov5)=names(df.cov3_sd)=names(df.cov5_sd)=col_names
col_names=c("dim","len.param","len.naive","len.vfcp","len.star","len.cv5","len.efcp")
names(df.len3)=names(df.len5)=names(df.len3_sd)=names(df.len5_sd)=col_names
df.cov = rbind(df.cov3[seq,], df.cov5[seq,])
df.cov_sd = rbind(df.cov3_sd[seq,], df.cov5_sd[seq,])
df.len = rbind(df.len3[seq,], df.len5[seq,])
df.len_sd = rbind(df.len3_sd[seq,], df.len5_sd[seq,])
Moment = c("3rd moment", "5th moment")
df_names = expand.grid(seq,Moment,names)
cov_vec = as.vector(as.matrix(df.cov[,-1]))
cov_sd_vec = as.vector(as.matrix(df.cov_sd[,-1]))
cov = cbind(df_names[,1], cov_vec, cov_sd_vec , df_names[,-1] )
len_vec = as.vector(as.matrix(df.len[,-1]))
len_sd_vec = as.vector(as.matrix(df.len_sd[,-1]))
len = cbind(df_names[,1], len_vec, len_sd_vec , df_names[,-1])
cov$Var="Coverage"
len$Var = "Width ratio"
colnames(cov) = c("V1","V2","sd","Moment","Method","Var")
colnames(len) = c("V1","V2","sd","Moment","Method","Var")
data_linear_fm=rbind(cov,len)
dummy_coverage <- data.frame(Var=c("Coverage"),Z= 0.9)
ggplot_linear_fm=ggplot(data=data_linear_fm, aes(x=V1, y=V2, group=Method,color=Method)) +
geom_errorbar(aes(ymin=V2-sd, ymax=V2+sd), width=.1)+
geom_line(size = 0.8, aes(color=Method))+ #,linetype=Method)) +
#geom_point(size=5, colour=bgnd)+
#geom_point(aes(shape=Method,color=Method)) +
theme(#axis.text.y = element_blank(),
#axis.ticks.y = element_blank(),
axis.title.y = element_blank(),
#plot.title = element_text(size = 8,hjust=0.5),
#axis.text = element_text(size = 10),
#axis.title =element_text(size = 10),
legend.position="top") +
facet_grid(Var~Moment,scales = "free")+
#scale_shape_manual(values=shape_manual) +
scale_color_manual(values=colors_manual) +
#ylim(0,1.2) + #xlab("dim") + ylab("Avg-Len")
xlab("Dimension")+guides(shape=FALSE)+
theme(strip.text.x = element_text(size = 12, face = "bold"), strip.text.y = element_text(size=12, face="bold"), axis.ticks.length=unit(.25, "cm"),axis.title=element_text(size=12))+ geom_hline(data=dummy_coverage, aes(yintercept=Z), linetype="dashed")
#> Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
#> ℹ Please use `linewidth` instead.
#> This warning is displayed once every 8 hours.
#> Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
#> generated.
#> Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
#> of ggplot2 3.3.4.
#> This warning is displayed once every 8 hours.
#> Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
#> generated.
ggplot_linear_fm
library(repr)
library(glmnet)
#> Loading required package: Matrix
#> Loaded glmnet 4.1-8
library(MASS)
library(mvtnorm)
name=paste("linear_fm_t3",sep="")
set.seed(2021)
df <- 3 #degrees of freedom
l <- 1 #number of dimensions
l.lambda <- 100
lambda_seq <- seq(0,200,l=l.lambda)
dim <- 5
alpha <- 0.1
n <- 200 #number of training samples
n0 <- 100 #number of prediction points
nrep <- 100 #number of independent trials
rho <- 0.5
lambda.efcp <- lambda.vfcp <- lambda.star <- lambda.cv5 <- rep(0,nrep)
for(i in 1:nrep){
for (r in 1:l){
d <- dim[r]
set.seed(i)
Sigma <- matrix(rho,d,d)
diag(Sigma) <- rep(1,d)
X <- rmvt(n,Sigma,df) #multivariate t distribution
beta <- rep(1:5,d/5)
eps <- rt(n,df)*(1+sqrt(X[,1]^2+X[,2]^2))
Y <- X%*%beta+eps
X0 <- rmvt(n0,Sigma,df)
eps0 <- rt(n0,df)*(1+sqrt(X0[,1]^2+X0[,2]^2))
Y0 <- X0%*%beta+eps0
out.efcp <- efcp_ridge(X,Y,X0,lambda=lambda_seq,alpha=alpha)
lambda.efcp[i] <- out.efcp$lambda
out.vfcp <- vfcp_ridge(X,Y,X0,lambda=lambda_seq,alpha=alpha)
lambda.vfcp[i] <- out.vfcp$lambda
out.star <- star.fun(X,Y,X0,lambda=lambda_seq,alpha=alpha)
lambda.star[i] <- out.star$lambda
out.cv5 <- cv.fun(X,Y,X0,lambda=lambda_seq,alpha=alpha,nfolds=5)
lambda.cv5[i] <- out.cv5$lambda
}
}
#> Error in lambda.vfcp[i] <- out.vfcp$lambda: replacement has length zero
#options(repr.plot.width=18, repr.plot.height=6)
#par(mar=c(1,1,1,1))
oldpar=par(mfrow=c(2,2))
hist(lambda.cv5,breaks=seq(min(lambda.cv5)-1,max(lambda.cv5)+1, by = 2),xlab=expression(lambda), cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5,main="CV-2splits") #make x,y, values, main larger
hist(lambda.star,breaks=seq(min(lambda.star)-1,max(lambda.star)+1, by = 2),xlab=expression(lambda), cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5,main="CV-3splits")
hist(lambda.efcp,breaks=seq(min(lambda.efcp)-1,max(lambda.efcp)+1, by = 2),xlab=expression(lambda), cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5,main="EFCP")
hist(lambda.vfcp,breaks=seq(min(lambda.vfcp)-1,max(lambda.vfcp)+1, by = 2),xlab=expression(lambda), cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5,main="VFCP")
The above plot shows that all four methods mostly use λ close to zero, which is expected given that we are considering a dimension example where the underlying data generating mechanism is a linear setting.
name=paste("nonlinear_fm_t3",sep="")
set.seed(2021)
df <- 3 #degrees of freedom
l <- 1 #number of dimensions
l.lambda <- 100
lambda_seq <- seq(0,200,l=l.lambda)
dim <- 5
alpha <- 0.1
n <- 200 #number of training samples
n0 <- 100 #number of prediction points
nrep <- 100 #number of independent trials
rho <- 0.5
lambda.efcp <- lambda.vfcp <- lambda.star <- lambda.cv5 <- rep(0,nrep)
for(i in 1:nrep){
for (r in 1:l){
d <- dim[r]
set.seed(i)
Sigma=matrix(rho,d,d)
diag(Sigma)=rep(1,d)
X=rmvt(n,Sigma,df) #multivariate t distribution
eps1=rt(n,df)*(1+sqrt(X[,1]^2+X[,2]^2))
eps2=rt(n,df)*(1+sqrt(X[,1]^4+X[,2]^4))
Y=rpois(n,sin(X[,1])^2 + cos(X[,2])^4+0.01 )+0.03*X[,1]*eps1+25*(runif(n,0,1)<0.01*eps2)
X0=rmvt(n0,Sigma,df)
eps01=rt(n0,df)*(1+sqrt(X0[,1]^2+X0[,2]^2))
eps02=rt(n0,df)*(1+sqrt(X0[,1]^4+X0[,2]^4))
Y0=rpois(n0,sin(X0[,1])^2 + cos(X0[,2])^4+0.01 )+0.03*X0[,1]*eps01+25*(runif(n0,0,1)<0.01)*eps02
out.efcp <- efcp_ridge(X,Y,X0,lambda=lambda_seq,alpha=alpha)
lambda.efcp[i] <- out.efcp$lambda
out.vfcp <- vfcp_ridge(X,Y,X0,lambda=lambda_seq,alpha=alpha)
lambda.vfcp[i] <- out.vfcp$lambda
out.star <- star.fun(X,Y,X0,lambda=lambda_seq,alpha=alpha)
lambda.star[i] <- out.star$lambda
out.cv5 <- cv.fun(X,Y,X0,lambda=lambda_seq,alpha=alpha,nfolds=5)
lambda.cv5[i] <- out.cv5$lambda
}
}
#> Error in lambda.vfcp[i] <- out.vfcp$lambda: replacement has length zero
#options(repr.plot.width=18, repr.plot.height=6)
oldpar=par(mfrow=c(2,2))
#par(mar=c(1,1,1,1))
hist(lambda.cv5,breaks=seq(min(lambda.cv5)-1,max(lambda.cv5)+1, by = 2),xlab=expression(lambda), cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5,main="CV-2splits") #make x,y, values, main larger
hist(lambda.star,breaks=seq(min(lambda.star)-1,max(lambda.star)+1, by = 2),xlab=expression(lambda), cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5,main="CV-3splits")
hist(lambda.efcp,breaks=seq(min(lambda.efcp)-1,max(lambda.efcp)+1, by = 2),xlab=expression(lambda), cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5,main="EFCP")
#> Error in hist.default(lambda.efcp, breaks = seq(min(lambda.efcp) - 1, : some 'x' not counted; maybe 'breaks' do not span range of 'x'
hist(lambda.vfcp,breaks=seq(min(lambda.vfcp)-1,max(lambda.vfcp)+1, by = 2),xlab=expression(lambda), cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5,main="VFCP")
par(oldpar)
The above plots show that all four methods mostly choose the penalty parameter λ to be as large as possible, which is also expected given that the underlying data generating mechanism is a nonlinear setting.