************************************************************ //Note: For E[F]=10, use f0=3 // For E[F]=20, use f0=4.358898944 //JRM, Program date 3/8/2022 //E-mail justin.mccrary@gmail.com with any bugs ************************************************************ ************************************************************ **********************Outline******************************* ************************************************************ ** This program: ***** (1) Numerically integrates to get the weak IV ***** theoretical prediction. ***** (2) Produces 10,000 Monte Carlo draws of sample ***** size 1000 to get 10,000 t-ratios. ***** (3) Calculates the rejection probability. ***** (4) Calculates the conditional expected length of ***** Aderson Rubin CIs and tF CIs. Plots these ***** for the 10,000 Monte Carlo draws ***** (5) Plots the histogram of the 10,000 t-ratios from ***** the Monte Carlo draws, the Weak IV Asymptotic ***** Theory, and the standard normal. Shows that the ***** histogram does not follow the standar normal density ***** and does closely match the theoretical density. ***************************************************************** ***************************************************************** capture log close log using weakivdensity, text replace set type double clear all //Default parameters correspond to rho=0.8 and E[F]=20 args rho f0 if ("`rho'"=="") local rho=0.8 if ("`f0'"=="") local f0=3 local abs_rho=abs(`rho') assert `abs_rho'<=1 //-1 <= rho <= 1 *************************************************************** ********* PART 1: WEAK IV THEORETICAL CALCULATIONS ************ *************************************************************** mata: //************************************************************* //************************************************************* //**** SETTING UP WEAK IV THEORETICAL CALCULATIONS ************ //************************************************************* //************************************************************* //Note: As we show in the paper, to implement our calculations //We have to numerically evaluate a series of integrals. //These next few programs define the integrands neeed. All of //the integrands are proportional to the normal density. //We will evaluate them with Gaussian quadrature using -Quadrature- //Piece 1 real scalar function piece1(real scalar x, real scalar rho, real scalar f0, real scalar c) { rplus =(-rho*c^2*x+sqrt(rho^2*c^4*x^2+c^2*x^2*(x^2-c^2)))/(x^2-c^2) rminus=(-rho*c^2*x-sqrt(rho^2*c^4*x^2+c^2*x^2*(x^2-c^2)))/(x^2-c^2) y1=( 0 - rho*(x-f0) ) / sqrt(1-rho^2) y2=( rminus - rho*(x-f0) ) / sqrt(1-rho^2) y3=( rplus - rho*(x-f0) ) / sqrt(1-rho^2) if (c>0) { return( (normal(y1)-normal(y2)) * normalden(x-f0) ) } else { return( (normal(y3)-normal(y1)) * normalden(x-f0) ) } } //Piece 2 real scalar function piece2(real scalar x, real scalar rho, real scalar f0, real scalar c) { rplus =(-rho*c^2*x+sqrt(rho^2*c^4*x^2+c^2*x^2*(x^2-c^2)))/(x^2-c^2) rminus=(-rho*c^2*x-sqrt(rho^2*c^4*x^2+c^2*x^2*(x^2-c^2)))/(x^2-c^2) y1=( 0 - rho*(x-f0) ) / sqrt(1-rho^2) y2=( rminus - rho*(x-f0) ) / sqrt(1-rho^2) y3=( rplus - rho*(x-f0) ) / sqrt(1-rho^2) if (c>0) { return( (normal(y1) - (normal(y2)-normal(y3))) * normalden(x-f0) ) } else { return( (1-normal(y1)) * normalden(x-f0) ) } } //Piece 3a real scalar function piece3a(real scalar x, real scalar rho, real scalar f0, real scalar c) { rplus =(-rho*c^2*x+sqrt(rho^2*c^4*x^2+c^2*x^2*(x^2-c^2)))/(x^2-c^2) rminus=(-rho*c^2*x-sqrt(rho^2*c^4*x^2+c^2*x^2*(x^2-c^2)))/(x^2-c^2) y1=( 0 - rho*(x-f0) ) / sqrt(1-rho^2) if (c>0) { return( normal(y1) * normalden(x-f0) ) } else { return( (1-normal(y1)) * normalden(x-f0) ) } } //Piece 3b real scalar function piece3b(real scalar x, real scalar rho, real scalar f0, real scalar c) { rplus =(-rho*c^2*x+sqrt(rho^2*c^4*x^2+c^2*x^2*(x^2-c^2)))/(x^2-c^2) rminus=(-rho*c^2*x-sqrt(rho^2*c^4*x^2+c^2*x^2*(x^2-c^2)))/(x^2-c^2) y1=( 0 - rho*(x-f0) ) / sqrt(1-rho^2) if (c>0) { return( (1 - normal(y1)) * normalden(x-f0) ) } else { return( normal(y1) * normalden(x-f0) ) } } //Piece 4 real scalar function piece4(real scalar x, real scalar rho, real scalar f0, real scalar c) { rplus =(-rho*c^2*x+sqrt(rho^2*c^4*x^2+c^2*x^2*(x^2-c^2)))/(x^2-c^2) rminus=(-rho*c^2*x-sqrt(rho^2*c^4*x^2+c^2*x^2*(x^2-c^2)))/(x^2-c^2) y1=( 0 - rho*(x-f0) ) / sqrt(1-rho^2) y2=( rminus - rho*(x-f0) ) / sqrt(1-rho^2) y3=( rplus - rho*(x-f0) ) / sqrt(1-rho^2) if (c>0) { return( (1-normal(y1)-(normal(y2)-normal(y3))) * normalden(x-f0) ) } else { return( normal(y1) * normalden(x-f0) ) } } //Piece 5 real scalar function piece5(real scalar x, real scalar rho, real scalar f0, real scalar c) { rplus =(-rho*c^2*x+sqrt(rho^2*c^4*x^2+c^2*x^2*(x^2-c^2)))/(x^2-c^2) rminus=(-rho*c^2*x-sqrt(rho^2*c^4*x^2+c^2*x^2*(x^2-c^2)))/(x^2-c^2) y1=( 0 - rho*(x-f0) ) / sqrt(1-rho^2) y2=( rminus - rho*(x-f0) ) / sqrt(1-rho^2) y3=( rplus - rho*(x-f0) ) / sqrt(1-rho^2) if (c>0) { return( (normal(y3)-normal(y1)) * normalden(x-f0) ) } else { return( (normal(y1)-normal(y2)) * normalden(x-f0) ) } } //Piece 6a real scalar function piece6a(real scalar x, real scalar rho, real scalar f0) { y1=( 0 - rho*(x-f0) ) / sqrt(1-rho^2) return( (1-normal(y1)) * normalden(x-f0) ) } //Piece 6b real scalar function piece6b(real scalar x, real scalar rho, real scalar f0) { y1=( 0 - rho*(x-f0) ) / sqrt(1-rho^2) return( normal(y1) * normalden(x-f0) ) } //************************************************** //** The next batch of programs are to setup ******* //** the actual integration. I am sure there ******* //** is a smarter way to structure these next ****** //** programs... lots of ugly redundancies ********* //** Although it is not pretty, it does work! ****** //************************************************** real scalar function intpiece1(real scalar rho, real scalar f0, real scalar c, real scalar a, real scalar b) { class Quadrature scalar q q=Quadrature() q.setEvaluator(&piece1()) q.setArgument(1, rho) //parameter from user q.setArgument(2, f0) //parameter from user q.setArgument(3, c) //what we loop over q.setLimits( (a,b) ) q.setAbstol(1e-12) //more stringent than the default... might actually be too stringent q.setReltol(1e-10) return(q.integrate()) } real scalar function intpiece2(real scalar rho, real scalar f0, real scalar c, real scalar a, real scalar b) { class Quadrature scalar q q=Quadrature() q.setEvaluator(&piece2()) q.setArgument(1, rho) //parameter from user q.setArgument(2, f0) //parameter from user q.setArgument(3, c) //what we loop over q.setLimits( (a,b) ) q.setAbstol(1e-12) q.setReltol(1e-10) return(q.integrate()) } real scalar function intpiece3a(real scalar rho, real scalar f0, real scalar c, real scalar a, real scalar b) { class Quadrature scalar q q=Quadrature() q.setEvaluator(&piece3a()) q.setArgument(1, rho) //parameter from user q.setArgument(2, f0) //parameter from user q.setArgument(3, c) //what we loop over q.setLimits( (a,b) ) q.setAbstol(1e-12) q.setReltol(1e-10) return(q.integrate()) } real scalar function intpiece3b(real scalar rho, real scalar f0, real scalar c, real scalar a, real scalar b) { class Quadrature scalar q q=Quadrature() q.setEvaluator(&piece3b()) q.setArgument(1, rho) //parameter from user q.setArgument(2, f0) //parameter from user q.setArgument(3, c) //what we loop over q.setLimits( (a,b) ) q.setAbstol(1e-12) q.setReltol(1e-10) return(q.integrate()) } real scalar function intpiece4(real scalar rho, real scalar f0, real scalar c, real scalar a, real scalar b) { class Quadrature scalar q q=Quadrature() q.setEvaluator(&piece4()) q.setArgument(1, rho) //parameter from user q.setArgument(2, f0) //parameter from user q.setArgument(3, c) //what we loop over q.setLimits( (a,b) ) q.setAbstol(1e-12) q.setReltol(1e-10) return(q.integrate()) } real scalar function intpiece5(real scalar rho, real scalar f0, real scalar c, real scalar a, real scalar b) { class Quadrature scalar q q=Quadrature() q.setEvaluator(&piece5()) q.setArgument(1, rho) //parameter from user q.setArgument(2, f0) //parameter from user q.setArgument(3, c) //what we loop over q.setLimits( (a,b) ) q.setAbstol(1e-12) q.setReltol(1e-10) return(q.integrate()) } real scalar function intpiece6a(real scalar rho, real scalar f0, real scalar a, real scalar b) { class Quadrature scalar q q=Quadrature() q.setEvaluator(&piece6a()) q.setArgument(1, rho) //parameter from user q.setArgument(2, f0) //parameter from user q.setLimits( (a,b) ) q.setAbstol(1e-12) q.setReltol(1e-10) return(q.integrate()) } real scalar function intpiece6b(real scalar rho, real scalar f0, real scalar a, real scalar b) { class Quadrature scalar q q=Quadrature() q.setEvaluator(&piece6b()) q.setArgument(1, rho) //parameter from user q.setArgument(2, f0) //parameter from user q.setLimits( (a,b) ) q.setAbstol(1e-12) q.setReltol(1e-10) return(q.integrate()) } //************************************************** //************************************************** //** PAY ATTENTION TO THE LIMITS OF INTEGRATION **** //************************************************** //************************************************** //Program to compute the cdf of t real scalar function t_cdf(real scalar rho, real scalar f0, real scalar c, real scalar int6) { real scalar int1, int2, int3a, int3b, int4, int5 real scalar infty, minfty, limit1, limit2, limit3, limit4 real scalar debug infty=10 //This appears to be subtle and should be improved upon in the next version //If you want good performance for larger values of f0, it needs to be 10ish //But if you tinker with really large values of rho (above 0.99), it needs to be 5ish minfty=-infty limit1=-sqrt(c^2) limit2=-sqrt(c^2)*sqrt(1-rho^2) limit3= sqrt(c^2)*sqrt(1-rho^2) limit4= sqrt(c^2) int1 = intpiece1 (rho,f0,c, minfty, limit1) int2 = intpiece2 (rho,f0,c, limit1, limit2) int3a= intpiece3a(rho,f0,c, limit2, 0) int3b= intpiece3b(rho,f0,c, 0 , limit3) int4 = intpiece4 (rho,f0,c, limit3, limit4) int5 = intpiece5 (rho,f0,c, limit4, infty) if (c>0) { result=int1+int2+int3a+int3b+int4+int5+int6 } else { //Note the max operator. Bc of the subtraction, this can have numerical problems and become a tiny negative number (e.g., -1e-12) result= max(-(int1+int2+int3a+int3b+int4+int5)+int6\0) } debug=0 //This is pretty mission critical if something is buggy numerically if (debug>0) { int1\int2\int3a\int3b\int4\int5\int6\result } return(result) } //Program to compute the pdf of t using Richardson extrapolation real matrix function t_pdf(real scalar rho, real scalar f0, real scalar N) { //Note: I draw on Shiraishi, Furuta, Ishimatsu, and Akhter (2007) //Mathematical Biosceinces vol 208 pp. 590-606 //They give a lucid description of Richardson extrapolation //In the end, for this problem, central differences do just fine (N=1) real scalar grid, LB, UB, h, G, g real scalar i, eps, num real colvector Y real colvector cvec, result infty=10 //Note the comment above: this appears to be important minfty=-infty int6a= intpiece6a(rho,f0, minfty, 0) int6b= intpiece6b(rho,f0, 0 , infty) int6=int6a+int6b //this is an argument for t_cdf, because pieces 6a and 6b do not depend on c grid=0.01 //This is the tightness of the evaluation sequence LB=-4 UB=4 assert(N<8) //authors only extend Richardson approach to N=7 Y=J(N,1,.) //used to calculate the numerator of the first derivative approximation eps=0.01 //explicit recommendation of authors G=1+(UB-LB)/grid cvec=((1::G):-1):/(G-1)*(UB-LB):+LB result=J(G,1,.) for (g=1; g<=G; g++) { h=max(eps*cvec[g]\1e-6) //do not level the h go below 1e-6... useful if cvec[g]=0, as it can be for (i=1; i<=N; i++) { Y[i]=t_cdf(rho,f0,cvec[g]+i*h,int6)-t_cdf(rho,f0,cvec[g]-i*h,int6) } if (N==1) { num=1*Y[1] } else if (N==2) { num=(4/3)*Y[1]+(-1/6)*Y[2] } else if (N==3) { num=(3/2)*Y[1]+(-3/10)*Y[2]+(1/30)*Y[3] } else if (N==4) { num=(8/5)*Y[1]+(-2/5)*Y[2]+(8/105)*Y[3]+(-1/140)*Y[4] } else if (N==5) { num=(5/3)*Y[1]+(-10/21)*Y[2]+(5/42)*Y[3]+(-5/252)*Y[4]+(1/630)*Y[5] } else if (N==6) { num=(12/7)*Y[1]+(-15/28)*Y[2]+(10/63)*Y[3]+(-1/28)*Y[4]+(2/385)*Y[5]+(-1/2772)*Y[6] } else if (N==7) { num=(7/4)*Y[1]+(-7/12)*Y[2]+(7/36)*Y[3]+(-7/132)*Y[4]+(7/660)*Y[5]+(-7/5148)*Y[6]+(1/2012)*Y[7] } result[g]=num/(2*h) } return(cvec,result) } //Time to exit MATA finally end //*************************************** //*************************************** //*** You have finally arrived at... **** //** WEAK IV THEORETICAL PREDICTIONS! *** //*************************************** //*************************************** // Note: the numerical integration can only handle // -0.99 <= rho <= 0.99. if `abs_rho' <= 0.99 { mata: f0=`f0' mata: rho=`abs_rho' // Must give the integration piece a positive rho // will flip the results for negative rho. local N=1 //If you want to tinker with Richardson extrapolation, increase N //Program tolerates N=1,2,...,7. N=4 is said to be optimal. But it is slow set rmsg on mata: tmp=t_pdf(rho,f0,`N') set rmsg off getmata (c t_pdf)=tmp label var c "Evaluation sequence" if (`rho' < 0) quietly replace c=-1*c label var t_pdf "Weak IV Theoretical Prediction" gen row=_n saveold weakivdensity, replace version(12) } ****************************************************** **** PART 2: MONTE CARLO DRAWS *********************** ****************************************************** //*************************************** //*************************************** //* TAKING THE THEORY TO THE EVIDENCE *** //*************************************** //*************************************** local n=1000 //sample size local R=10000 //number of replications local EF=1+(`f0')^2 //This is for the stub to the chart local pi=(`f0')/sqrt(`n') //Argument of MC() //******************** //* Monte Carlo DGP ** //******************** //Taken from Mostly Harmless Econometrics capture program drop MC program define MC, rclass { syntax [, n(integer 1000) rho(real 0.8) pi(real 0.04472136)] drop _all //NOTE: This drops the data, but not the locals/globals (pi,rho,n) set obs `n' gen v=rnormal() //v, Z, and epsilon are all N(0,1) gen Z=rnormal() gen epsilon=rnormal() //epsilon is used to construct u gen u = `rho'*v + sqrt(1-(`rho')^2)*epsilon //Var(u)=1 gen X = `pi'*Z + v gen Y = X + u ivreg2 Y (X=Z), robust //run IV predict uhat, resid // residual used for calculating AR cond. exp. length local bhat=_b[X] //store IV coefficient local se_bhat=_se[X] //store IV standard error (Wald) local t=(`bhat'-1)/`se_bhat' //store t local tLB=_b[X]-invnorm(0.975)*_se[X] //...and edges of Wald confidence interval local tUB=_b[X]+invnorm(0.975)*_se[X] reg X Z, robust predict vhat, resid // residual used for calculating AR cond. exp. length local F = (_b[Z]/_se[Z])^2 // first-stage F statistic! gen Zuhat = Z*uhat gen Zvhat = Z*vhat correlate Zuhat Zvhat local rho_tilde_hat = r(rho) // used for calculating AR cond. exp. length // tf stata function tf Y (X=Z), robust local tfLB = e(tF_LB_05) local tfUB = e(tF_UB_05) local tf_critical_value = e(tF_critical_value_05) local F = e(F) //What follows is Stata gobbledy-gook for getting the locals passed to the -simulate- command return scalar bhat=`bhat' return scalar se_bhat=`se_bhat' return scalar t=`t' return scalar tUB=`tUB' return scalar tLB=`tLB' return scalar F = `F' return scalar rho_tilde_hat=`rho_tilde_hat' return scalar tfLB = `tfLB' return scalar tfUB = `tfUB' return scalar tf_critical_value = `tf_critical_value' ereturn clear } end //************************************** //*** Run the simulation *************** //************************************** set seed 123456789 set rmsg on simulate, reps(`R'): MC, n(`n') rho(`rho') pi(`pi') set rmsg off if `abs_rho' <= 0.99 { gen row=_n merge 1:1 row using weakivdensity saveold weakivdensity, replace version(12) } else { gen row=_n saveold weakivdensity, replace version(12) } ************************************************* *** PART 3: CALCULATE REJECTION PROBABILITIES *** ************************************************* use weakivdensity.dta, clear gen reject_tF = (t^2) > (tf_critical_value^2) replace reject_tF = 0 if F <= (invnorm(0.975))^2 replace reject_tF = 0 if tf_critical_value==. display "tF rejection rate" sum reject_tF // this outputs the rejection probability gen reject_standard = (t^2) > (1.96^2) display "standard rejection rate" sum reject_standard sort row // must re-sort by row for upcoming graphs saveold weakivdensity.dta, replace ********************************************************* *** PART 4: CONDITIONAL EXPECTED LENGTH FOR TF AND AR *** ********************************************************* use weakivdensity.dta, clear // create "samples" of 100 each gen sample = int(_n/100) gen reps = 1 // Length of tF interval. gen tF_length = abs(tfUB - tfLB) // Length of AR interval. local q = (invnorm(0.975))^2 gen m_arL = bhat + ((-(`q'*rho_tilde_hat/sqrt(F)) - sqrt(`q')* sqrt(1 - ((`q'*(1 - rho_tilde_hat^2))/F)))/(1 - (`q'/F)))*se_bhat gen m_arR = bhat + ((-(`q'*rho_tilde_hat/sqrt(F)) + sqrt(`q')* sqrt(1 - ((`q'*(1 - rho_tilde_hat^2))/F)))/(1 - (`q'/F)))*se_bhat gen ar_length = abs(m_arR - m_arL) // Prepare for graphing quietly summarize sample local max_sample= r(max) forvalues s=0/`max_sample' { preserve quietly keep if sample<=`s' // only keep if F>3.84 quietly drop if F<=(invnorm(0.975))^2 // collapse collapse (mean) tF_length ar_length (min) F (sum) reps tempfile new_conditional_`s' quietly save `new_conditional_`s'' restore } clear forvalues s=0/`max_sample' { append using `new_conditional_`s'' } sort reps save "tF_AR_expLength.dta", replace // Graph the tF and AR conditional expected lengths use "tF_AR_expLength.dta", clear set scheme s2mono graph twoway connected /// ar_length tF_length reps, /// lpattern(solid shortdash) /// msymbol(none none) /// graphregion(color(gs16) fcolor(gs16)) /// title("{&rho} = `rho', E[F] = `EF'") /// xtitle("Accumulated Monte Carlo Draws") /// ytitle("Conditional Expected Length") /// legend(order(1 "AR Length" 2 "tF Length") /// size(small) region(lstyle(none))) /// note("Note: E[F] = f{subscript:0}{superscript:2} + 1") graph export expected_length.pdf, replace ******************************************************* **** PART 5: PLOT MONTE CARLO DRAW, WEAK IV THEORY **** **** AND STANDARD NORMAL **** ******************************************************* if `abs_rho' <= 0.99 { use weakivdensity.dta, clear sort row // make sure it's sorted by row saveold weakivdensity.dta, replace set scheme s2mono graph twoway /// (histogram t if abs(t)<4, width(0.08) color(gs10) lcolor(gs8)) /// (line t_pdf c, lcolor(black) lpattern(solid) lwidth(medthick)) /// (function y=normalden(x), range(-4 4) lcolor(black) lpattern(dash)) /// , /// title("{&rho} = `rho', E[F] = `EF'") /// ytitle("Density", margin(medium)) /// xtitle("Value of t-ratio") /// legend(order(2 "Weak IV Asymptotic Theory" /// 3 "Standard Normal" /// 1 "Monte Carlo Evidence") /// ring(0) position(2) rows(3) size(vsmall)) /// graphregion(color(white)) bgcolor(white) /// xlabel(-4 -3 -2 -1 0 1 2 3 4) /// note("Note: E[F] = f{subscript:0}{superscript:2} + 1") graph export weakivdensity_091321.pdf, replace } else { display in red "Currently, the program only calculates the Weak IV Asymptotic Theory for -0.99 < rho < 0.99." display in red "For rho outside of these bounds, the program will still generate the critical values, rejection probabilities, and conditional expected length." } log close