/*------------------------------------------------------------------------------

		Bunching at tariff steps
		Elasticity estimates
		
------------------------------------------------------------------------------*/


/*------------------------------------------------------------------------------

Steps

	1. Break into bins
	2. Estimate counterfactual
	3. Estimate full bunching equation
	4. Subtract counterfactual from full bunching equation to get bunching.
	-- Uses proportional integration constraint rule
	
	
------------------------------------------------------------------------------*/
set more off
clear all
set matsize 1000


cap cd "C:/Users/UCTAdmin/Dropbox"
cap cd "C:/Users/Grant Smith/Dropbox (GRS collaborations)"
cap cd "/Users/kelsey/Dropbox"
cap cd "C:/Users/JPAL/Dropbox"
cap cd "/Users/Grant/Dropbox"


// (1) set data and replications

global IN  "SA Utilities/Phase in/Mitchell's Plain/Data and analysis"
global resdir "SA Utilities/Paper/Tables-Figures"

use "$IN/Data/admin_data/workingdata_analysis.dta", clear
local data "MP"										// MP for Mitchells Plain analysis; elast for larger CoCT sample

*use "$IN/Data/admin_data/elast_quartile_clean.dta", clear // check consistency with larger sample
*local data "elast"

local r 500 // replications for bootstrapping SEs

if "`data'" == "MP" { // run both with 2013 / 0 and 2015 / 1 // **//Just use these two combinations - don't need to re-run the "larger sample from CoCT" analysis
	global y "2015"
	global m "1"
	}
	
	else {											// ...because we have defined the data to be "MP" after loading workingdata above
		global y "2013 2014 2015"
		global m "0 1"
		}

	
// (2) define programs for block bootstrap: myboot_elast and myboot_mass

	// elasticity program
	
		cap program drop myboot_elast // block bootstrap - sampling error from estimation of counterfactual distribution
			program define myboot_elast, rclass																				
				preserve
				bsample
		
				qui reg freq bin_* if window == 0, cl(id)
				predict cfact
				label var cfact "counterfactual predicted distribution"
				
				* Shift the counterfactual distribution in proportion to bin size to satisfy the integration constraint
				
				egen totalcfact = total(cfact), mi
				egen totalactual = total(freq), mi
				
				sum totalcfact totalactual
				
				local i= 0.01
				local j= 1
				while (totalactual>totalcfact){
					local j = `j'+ `i'
					di "`j'"
					replace cfact = cfact*`j'
					drop totalcfact
					egen totalcfact = total(cfact), mi
					if (totalactual<=totalcfact) break
				}
				
				** Get predicted mass over window
	
				gen pred1 = .
				replace pred1 = cfact if window == 1		
				egen pred_mass = total(pred1), mi			
				label var pred_mass "counterfactual mass"
				
				** Then get actual mass over window
				
				gen prag1 = .
				replace prag1 = freq if window == 1
				egen act_mass = total(prag1), mi
				label var act_mass "actual mass"
	
				** Now compare actual mass to predicted mass 
				
				gen  bunch_mass = (act_mass-pred_mass)/pred_mass
				label var bunch_mass "bunching mass"
								
				** Elasticity calculation
				
				gen elasticity = bunch_mass/((t2-t3)/t2) // price denominator is -(t3-t2)/t2
				sum elasticity 
				return scalar elast = r(mean)
				
				restore
			end	
			
		// mass program
		
			cap program drop myboot_mass // block bootstrap - sampling error from estimation of counterfactual distribution
			
			program define myboot_mass, rclass																				
				preserve
				bsample
		
				qui reg freq bin_* if window == 0, cl(id)
				predict cfact
				label var cfact "counterfactual predicted distribution"
				
				* Shift the counterfactual distribution in proportion to bin size to satisfy the integration constraint
				
				egen totalcfact = total(cfact), mi
				egen totalactual = total(freq), mi
				
				sum totalcfact totalactual
				
				local i= 0.01
				local j= 1
				while (totalactual>totalcfact){
					local j = `j'+ `i'
					di "`j'"
					replace cfact = cfact*`j'
					drop totalcfact
					egen totalcfact = total(cfact), mi
					if (totalactual<=totalcfact) break
				}
				
				** Get predicted mass over window
	
				gen pred1 = .
				replace pred1 = cfact if window == 1		
				egen pred_mass = total(pred1), mi			
				label var pred_mass "counterfactual mass"
				
				** Then get actual mass over window
				
				gen prag1 = .
				replace prag1 = freq if window == 1
				egen act_mass = total(prag1), mi
				label var act_mass "actual mass"
	
				** Now compare actual mass to predicted mass 
				
				gen  bunch_mass = (act_mass-pred_mass)/pred_mass
				label var bunch_mass "bunching mass"
				sum bunch_mass										
				return scalar bmass = r(mean)						
				
				restore
			end	
			
// (3) additional data set up
			
	cap drop if switchmonthD == 1
	cap drop if switcher == 0
	cap drop group
	keep if tariff == 1 // for now, restrict to lifeline tariff in bill years with step at 350 kwh
	keep if bill_year > 2012
	sum mcons_no0, d
	drop if mcons_no0 > `r(p99)' // trim very high outliers
	drop if mcons_no0 < 0 // 1,036 observations dropped
	global maxcons = `r(p99)' // or `r(p99)'?
	
	cap drop t2 t3
	gen t2 = 0.079 if bill_year == 2013
	replace t2 = 0.084 if bill_year == 2014
	replace t2 = 0.091 if bill_year == 2015
	gen t3 = 0.184 if bill_year == 2013
	replace t3 = 0.204 if bill_year == 2014
	replace t3 = 0.251 if bill_year == 2015


	cap file close elast		
	if "$m" == "0" { // fix txt file to replace in first loop only
		file open elast using "$IN/Data/admin_data/elast.txt", write text replace
		
		file write elast "data" _tab "n" _tab "year" _tab "meter" _tab "window" _tab "poly" _tab "stat" _tab "observed" _tab "bootstrap" ///
			_tab "se" _tab "ci normal lb" _tab "ci normal ub" _tab _n	
	}
	
	else {											 
		file open elast using "$IN/Data/admin_data/elast.txt", write text append
		} 


// (4) loops for robustness checks

	local bwidth = 10  // change bin width here 

	foreach w in 2 3 4 5 6 {	// window is bwidth * `w'	

		foreach poly in 5 6 7 8 9 {
			preserve

				

// (5) start analysis 

	** create bins
	
	egen bin = cut(mcons_no0), at(0(`bwidth')$maxcons)
	label var bin "consumption bin with width of `bwidth'"
	sum bin
	global num_bin = `r(max)'
	
	bys bin bill_year pp: gen freq = _N
	replace freq = 0 if freq == .
	label var freq "count of obs in a consumption bin by meter type and year"

	** Select bunching window width 

	egen btag = tag(bin pp bill_year)
	gen delta = `w'*`bwidth'
	gen zstar = 350
	// restrict sample for counterfactual to vicinity of zstar
	*keep if (bin >= (zstar * .5)) & (bin <= (zstar * 1.5))
		
	** Construct bunching window // focus on zstar = 350 
		gen window = ((bin >= (zstar)) & (bin <= (zstar + delta))) // (bin >= (zstar - delta))
		label var window "within delta of kink point (symmetrical)" // trying all to the right
	
 ** set up counterfactual distribution
		
		forval i = 1/`poly' {					
			gen bin_`i' = bin ^ `i'
			}
		
		keep if btag == 1 // each bin contributes to the analysis not each observation


			
	** Estimate counterfactual and calculate elasticities

	tempfile bins
	save `bins', replace
	
	foreach y in $y { 
		foreach m in $m  {
		
			use `bins', clear
			
			keep if bill_year == `y' & pp == `m'			// 
			sum bin freq
			
				reg freq bin_* if window == 0, cl(id)
				predict cfact
				label var cfact "counterfactual predicted distribution"
			
				egen totalcfact = total(cfact), mi
				egen totalactual = total(freq), mi
				
				local i= 0.01
				local j= 1
				while (totalactual>totalcfact){
					local j = `j'+ `i'
					di "`j'"
					replace cfact = cfact*`j'
					drop totalcfact
					egen totalcfact = total(cfact), mi
					if (totalactual<=totalcfact) break
				}
	
				** Get predicted mass over window
	
				gen pred1 = .
				replace pred1 = cfact if window == 1
				egen pred_mass = total(pred1), mi
				label var pred_mass "counterfactual mass"
			
						
				** Then get actual mass over window
				
				gen prag1 = .
				replace prag1 = freq if window == 1
				egen act_mass = total(prag1), mi
				label var act_mass "actual mass"
	
				** Now compare actual mass to predicted mass
				
				gen  bunch_mass = (act_mass-pred_mass)/pred_mass
				label var bunch_mass "bunching mass"
				sum bunch_mass 
				local observed_bmass = r(mean)
								
				** Elasticity calculation
				
				gen elasticity = bunch_mass/((t2-t3)/t2) 
				sum elasticity 
	
				local observed_elast = r(mean)
				local n = r(N)
				
				if `poly' == 7 & `w' == 4 {
					qui binscatter cfact freq bin, ///
					line(connect) xline(350) msymbol(none none) ///
					legend(order(1 "Counterfactual" 2 "Actual")) ///
					ytitle(Observations per bin) xtitle(Monthly kWh) 
					graph export "$resdir/elast_`y'_`m'.pdf", replace	
					}
				else {
					}

			sum *_mass		
			sum bunch_mass
			local bunch : display %-4.3f r(mean)
				
			// now execute block bootstrap

			// first for elasticity
			di "elasticity bootstrap"
						
			use `bins', clear
			keep if bill_year == `y' & pp == `m'
			
			simulate mean = r(elast) , reps(`r') seed(12345): myboot_elast

			bstat, stat(`observed_elast') n(`n')
			
			local b 	: display %-4.3f _b[mean]
			local se 	: display %-4.3f _se[mean]
			matrix bs_`y'_`m' = e(b_bs)
			local bs 	= bs_`y'_`m'[1,1]
			matrix ci_n_`y'_`m' = e(ci_normal)
			local ci_n_lb = ci_n_`y'_`m'[1,1]
			local ci_n_ub = ci_n_`y'_`m'[2,1]
			matrix ci_b_`y'_`m' = e(ci_bc)
			local ci_b_lb = ci_b_`y'_`m'[1,1]
			local ci_b_ub = ci_b_`y'_`m'[2,1]

			estat bootstrap, all
			 

		file write elast "`data'" _tab "`n'" _tab "`y'" _tab "`m'" _tab "`w'" _tab "`poly'" _tab "elast" _tab "`b'" _tab "`bs'" _tab ///
			"`se'" _tab "`ci_b_lb'" _tab "`ci_b_ub'" _n	
			
			// then for bunching mass
			di "mass bootstrap"
	
			use `bins', clear
			keep if bill_year == `y' & pp == `m'
			
			simulate mean = r(bmass) , reps(`r') seed(12345): myboot_mass

			bstat, stat(`observed_bmass') n(`n')
			
			local b 	: display %-4.3f _b[mean]
			local se 	: display %-4.3f _se[mean]
			matrix bs_`y'_`m' = e(b_bs)
			local bs 	= bs_`y'_`m'[1,1]
			matrix ci_n_`y'_`m' = e(ci_normal)
			local ci_n_lb = ci_n_`y'_`m'[1,1]
			local ci_n_ub = ci_n_`y'_`m'[2,1]
			matrix ci_b_`y'_`m' = e(ci_bc)
			local ci_b_lb = ci_b_`y'_`m'[1,1]
			local ci_b_ub = ci_b_`y'_`m'[2,1]

			estat bootstrap, all

		file write elast "`data'" _tab "`n'" _tab "`y'" _tab "`m'" _tab "`w'" _tab "`poly'" _tab "mass" _tab "`b'" _tab "`bs'" _tab ///
			"`se'" _tab "`ci_b_lb'" _tab "`ci_b_ub'" _n				
		
			}
		}
		
			restore
			} // close polynomial loop
		
		} // close bin loop
		
		file close elast
		
	
** (6) Read in results and run t-tests

	import delimited "$IN/Data/admin_data/elast.txt", varnames(1) clear
	
	* test stat: (b2 - b1) / ( ((var2^2/(n2)) + (var1^2/(n1)))^.5 ) // test for equality of means
	// var = se^2 * n

	gen t_elast = .
	gen t_mass = .
	
	cap file close bunching
		file open bunching using ///
			"$resdir/MP_bunching.tex", ///
			write text replace
		file write bunching "\begin{tabular}{cccccccc}"	_n
		file write bunching "\hline" _n
		file write bunching "& &  \multicolumn{3}{c}{Excess mass} & \multicolumn{3}{c}{Elasticity} \\" _n
		file write bunching " Window & Polynomial & Postpaid & Prepaid & T-stat & Postpaid & Prepaid & T-stat \\ " _n
		file write bunching " & & (1) & (2) & (3) & (4) & (5) & (6) \\ \hline" _n

	foreach w in 2 3 4 5 6 {		

		foreach poly in 5 7 9 {
			
			
			foreach stat in elast mass {
			
				sum bootstrap if stat == "`stat'" & year == 2013 & poly == `poly' & window == `w'
				local b1_`stat'  : display %-4.3f r(mean)
				sum se if stat == "`stat'" & year == 2013 & poly == `poly' & window == `w'
				local se1_`stat'  : display %-4.3f r(mean)
				sum n if year == 2013 & poly == `poly' & window == `w'
				local n1  : display %-4.3f r(mean)
	
				sum bootstrap if stat == "`stat'" & year == 2015 & poly == `poly' & window == `w'
				local b2_`stat'  : display %-4.3f r(mean)
				sum se if stat == "`stat'" & year == 2015 & poly == `poly' & window == `w'
				local se2_`stat'  : display %-4.3f r(mean)
				sum n if year == 2015 & poly == `poly' & window == `w'
				local n2  : display %-4.3f r(mean)
				
			replace t_`stat' = (`b2_`stat''-`b1_`stat'') / (( ((((`se2_`stat'')^2)*`n2')/(`n2')) + ((((`se1_`stat'')^2)*`n1')/(`n1')) )^0.5) if poly == `poly' & window == `w'
				sum t_`stat'
				local t_`stat' : display %-4.3f r(mean)
				
				}
			
			local window = `w'*10
		
		file write bunching " `window' & `poly' & `b1_mass' & `b2_mass' & `t_mass' & `b1_elast' & `b2_elast' & `t_elast' \\ " _n
		file write bunching "  &  & (`se1_mass') & (`se2_mass') &  & (`se1_elast') & (`se2_elast') &  \\ " _n

				
			}
		}
		
		
		file write bunching "\hline" _n
		file write bunching "\end{tabular}"
		file close bunching	

ddd

*** Additional checks ***

* 1) Do we see bunching for domestic pp customers at 350 kwh
* 2) Do we see bunching for customers who received the wrong tariff after the switch?

use "$IN/Data/admin_data/workingdata.dta", clear
			
	cap drop if switchmonthD == 1
	cap drop if switcher == 0
	cap drop group
	keep if tariff == 2 // Domestic tariff
	keep if bill_year >2012
		
	cap drop t2 t3 // construct "placebo tariff"
	gen t2 = 0.079 if bill_year == 2013
	replace t2 = 0.084 if bill_year == 2014
	replace t2 = 0.091 if bill_year == 2015
	gen t3 = 0.184 if bill_year == 2013
	replace t3 = 0.204 if bill_year == 2014
	replace t3 = 0.251 if bill_year == 2015	
					
	local bwidth = 10  // change bin width here 
	
	* start with (1)
	preserve
	
	drop if tariff_error == 1
	sum mcons_no0, d
	drop if mcons_no0 > `r(p99)' // trim very high outliers
	drop if mcons_no0 < 0 // 1,036 observations dropped
	global maxcons = `r(p99)' // or `r(p99)'?

	** create bins
	
	egen bin = cut(mcons_no0), at(0(`bwidth')$maxcons)
	label var bin "consumption bin with width of `bwidth'"
	sum bin
	global num_bin = `r(max)'
	
	bys bin bill_year pp: gen freq = _N
	replace freq = 0 if freq == .
	label var freq "count of obs in a consumption bin by meter type and year"

	** Select bunching window width 

	egen btag = tag(bin pp bill_year)
	gen delta = 4*`bwidth'
	gen zstar = 350
	// restrict sample for counterfactual to vicinity of zstar
		
	** Construct bunching window // focus on zstar = 350 
		gen window = ((bin >= (zstar)) & (bin <= (zstar + delta))) // (bin >= (zstar - delta))
		label var window "within delta of kink point (symmetrical)" // trying all to the right
	
 ** set up counterfactual distribution
		
		forval i = 1/7 {					
			gen bin_`i' = bin ^ `i'
			}
		
		keep if btag == 1 // each bin contributes to the analysis not each observation
			
			keep if bill_year == 2015 & pp == 1			
			sum bin freq
			
				reg freq bin_* if window == 0, cl(id)
				predict cfact
				label var cfact "counterfactual predicted distribution"
			
				egen totalcfact = total(cfact), mi
				egen totalactual = total(freq), mi
				
				local i= 0.01
				local j= 1
				while (totalactual>totalcfact){
					local j = `j'+ `i'
					di "`j'"
					replace cfact = cfact*`j'
					drop totalcfact
					egen totalcfact = total(cfact), mi
					if (totalactual<=totalcfact) break
				}
	
				** Get predicted mass over window
	
				gen pred1 = .
				replace pred1 = cfact if window == 1
				egen pred_mass = total(pred1), mi
				label var pred_mass "counterfactual mass"
			
						
				** Then get actual mass over window
				
				gen prag1 = .
				replace prag1 = freq if window == 1
				egen act_mass = total(prag1), mi
				label var act_mass "actual mass"
	
				** Now compare actual mass to predicted mass
				
				gen  bunch_mass_dom = (act_mass-pred_mass)/pred_mass
				label var bunch_mass_dom "bunching mass"
								
				** Elasticity calculation
				
				gen elasticity_dom = bunch_mass_dom/((t2-t3)/t2) 
				di "Domestic customer elasticity and bunch mass:"
				sum elasticity_dom bunch_mass_dom
	restore
	
	* then check (2) 
	
	preserve 
	
	drop if tariff_error == 0
	sum mcons_no0, d
	drop if mcons_no0 > `r(p99)' // trim very high outliers
	drop if mcons_no0 < 0 // 1,036 observations dropped
	global maxcons = `r(p99)' // or `r(p99)'?

	** create bins
	
	egen bin = cut(mcons_no0), at(0(`bwidth')$maxcons)
	label var bin "consumption bin with width of `bwidth'"
	sum bin
	global num_bin = `r(max)'
	
	bys bin bill_year pp: gen freq = _N
	replace freq = 0 if freq == .
	label var freq "count of obs in a consumption bin by meter type and year"

	** Select bunching window width 

	egen btag = tag(bin pp bill_year)
	gen delta = 4*`bwidth'
	gen zstar = 350
	// restrict sample for counterfactual to vicinity of zstar
		
	** Construct bunching window // focus on zstar = 350 
		gen window = ((bin >= (zstar)) & (bin <= (zstar + delta))) // (bin >= (zstar - delta))
		label var window "within delta of kink point (symmetrical)" // trying all to the right
	
 ** set up counterfactual distribution
		
		forval i = 1/7 {					
			gen bin_`i' = bin ^ `i'
			}
		
		keep if btag == 1 // each bin contributes to the analysis not each observation
			
			keep if bill_year == 2015 & pp == 1			
			sum bin freq
			
				reg freq bin_* if window == 0, cl(id)
				predict cfact
				label var cfact "counterfactual predicted distribution"
			
				egen totalcfact = total(cfact), mi
				egen totalactual = total(freq), mi
				
				local i= 0.01
				local j= 1
				while (totalactual>totalcfact){
					local j = `j'+ `i'
					di "`j'"
					replace cfact = cfact*`j'
					drop totalcfact
					egen totalcfact = total(cfact), mi
					if (totalactual<=totalcfact) break
				}
	
				** Get predicted mass over window
	
				gen pred1 = .
				replace pred1 = cfact if window == 1
				egen pred_mass = total(pred1), mi
				label var pred_mass "counterfactual mass"
			
						
				** Then get actual mass over window
				
				gen prag1 = .
				replace prag1 = freq if window == 1
				egen act_mass = total(prag1), mi
				label var act_mass "actual mass"
	
				** Now compare actual mass to predicted mass
				
				gen  bunch_mass_error = (act_mass-pred_mass)/pred_mass
				label var bunch_mass_error "bunching mass"
								
				** Elasticity calculation
				
				gen elasticity_error = bunch_mass_error/((t2-t3)/t2) 
				di "Tariff error elasticity and bunch mass:"
				sum elasticity_error bunch_mass_error
			
			restore	
