R=1000; % number of Monte Carlo replications p=2450; % number of potential explanatory variables n=35; % sample size s=5; % number of signal variables tau=1; % error variance cv=1.96; % for nominal 95% coverage c=0.9; % correlation between columns of X vMax=1000; % how many coefficients to calculate probabilities for (can set equal to p). Sigma=c.*ones(p,p)+(1-c).*eye(p); initX=mvnrnd(zeros(1,p),Sigma,n); meanX=mean(initX,1); X=initX-repmat(meanX,n,1); I=eye(n); qOptArray=zeros(n,1,vMax); % for the purpose of the simulation it is helpful to store the optimum q. In practice this is not needed. varBetaHat=zeros(vMax,1); vBias=zeros(vMax,1); % stores value of the bias at the optimum for each coeff for v=1:vMax if mod(v,10)==0 v % outputs progress after every 10 MC replicates end Xv=X(:,v); Xnotv=X; Xnotv(:,v)=[]; M=Xnotv*Xnotv'; qOpt=(I+M)\Xv; scale=qOpt'*Xv; toAdd=0; for j=1:s % only signal variables (positions 1-5) contribute to the bias if j~=v % if the vth variable is signal, omit this Xj=X(:,j); toAdd=toAdd+(qOpt'*Xj); end end vBias(v,1)=toAdd/scale; qOptArray(:,:,v)=qOpt; variance=tau*(qOpt'*qOpt)/((qOpt'*Xv)^2); % in practice tau needs to be estimated. We did this as in Appendix B. There are other options. varBetaHat(v,1)=variance; end beta=[ones(s,1);zeros(p-s,1)]; % sparse vector of true betas with the signal variables in positions 1-5. betaHat=zeros(R,vMax); betaUpper=zeros(R,vMax); betaLower=zeros(R,vMax); isInSet=zeros(R,vMax); for r=1:R if mod(r,10)==0 r end epsilon=tau.*randn(n,1); Y=X*beta+epsilon; for v=1:vMax qOpt=squeeze(qOptArray(:,:,v)); Xv=X(:,v); b=(qOpt'*Y)/(qOpt'*Xv); betaHat(r,v)=b; variance=varBetaHat(v,1); upperlimit=b+cv*sqrt(variance); lowerlimit=b-cv*sqrt(variance); betaUpper(r,v)=upperlimit; betaLower(r,v)=lowerlimit; isInSet(r,v)=(beta(v,1)>lowerlimit).*(beta(v,1)