
************************************
***********Data Cleaning************
************************************


*************Merge Host Country Level Controls******************

qui use "S:\Li Liu\Amadeus Downloads\ETPF\Financial\Disk Download\Amadeus_ETPF_full", replace
qui gen country=substr(bvd_id,1,2)

qui merge m:1 country year using "${cbt_datadir}\Control\controls"
qui drop _merge

qui merge m:1 country year using "${cbt_datadir}\Control\uoctry_macroclean2", ///
keepusing (governance_estimate depth_fi efficiency_fi efficiency_fm stable_fi)
qui ren governance_estimate host_governance
qui ren depth_fi host_depth_fi
qui ren efficiency_fi host_efficiency_fi
qui ren efficiency_fm host_efficiency_fm
qui ren stable_fi host_stable_fi
qui drop if _merge==2
qui drop _merge

qui save "S:\Li Liu\Amadeus Downloads\ETPF\Financial\Disk Download\Exemption_full", replace

*********************Sample Selection*****************************

qui use "S:\Li Liu\Amadeus Downloads\ETPF\Financial\Disk Download\Exemption_full", clear
describe
egen id=group(bvd_id)

*identify host country location*
tab uo_ctry
qui replace uo_ctry="" if uo_ctry=="n.a."

*relevant groups: subsidiaries with an ultimate owner*

gen keep=(uo_ctry!="" & country!="")
tab keep

preserve
drop if keep==1
qui save "S:\Li Liu\Amadeus Downloads\ETPF\Financial\Disk Download\Amadeus_ETPF_noultimate owner", replace
restore

preserve
keep if country==uo_ctry 
qui save "S:\Li Liu\Amadeus Downloads\ETPF\Financial\Disk Download\Amadeus_ETPF_domestic", replace
restore

drop if (country==uo_ctry & country!="GB")| keep==0             /*subs with foreign parent*/
drop keep 


gen insample_foreign=country!="GB" 
gen insample_uk=country=="GB"

qui save "${cbt_datadir}\Exemption_MNE_toclean", replace

***********Variables Clean**********************************

qui use "${cbt_datadir}\Exemption_MNE_toclean", clear

sum

local varlist asset_fixed asset_intang asset_tang asset_current asset_total debt_lt lib_current  ///
turnover uo_turnover uo_asset depr wage 

foreach var of varlist `varlist' {

	sum `var' if `var'<0
	local d_n=r(N)
	drop if `var'<0
	note:  `d_n' observations deleted
	}

tab uo_ctry
drop if uo_ctry=="-"

***Industry Selection***

gen nace_2digit=int(ind_nace/100)
tab nace_2digit

drop if nace_2digit==64 | nace_2digit==65 | nace_2digit==66 /*exclude banks and insurance companies*/
drop if ind_nace>=8400 & ind_nace<=8430  /*exclude public administration and defence; compulsory social security*/
drop if ind_nace>=8500 & ind_nace<=8559  /*exclude education*/
drop if ind_nace>=8600 & ind_nace<=8899  /*exclude human health and social work*/
drop if ind_nace>=9600 & ind_nace<9900    /*exclude households and extraterritorial organizations and bodies*/
drop if ind_nace>=9900                    /*exclude missing industry code*/

*Industry sector*
gen nace_1digit=int(ind_nace/1000)
*agriculture, forestry and fishing
replace nace_1digit=1 if ind_nace>=100 & ind_nace<=322
*mining and construction
replace nace_1digit=2 if (ind_nace>322 & ind_nace<=999) | (ind_nace>=4100 & ind_nace<=4399)
*manufacturing
replace nace_1digit=3 if ind_nace>=1000 & ind_nace<=3320 
*energy supply
replace nace_1digit=4 if ind_nace>=3500 & ind_nace<=3900 
*wholesale and retail trade
replace nace_1digit=5 if ind_nace>=4500 & ind_nace<=4799
*transportating and storage
replace nace_1digit=6 if ind_nace>=4900 & ind_nace<=5320
*accomodation and food sercives
replace nace_1digit=7 if ind_nace>=5500 & ind_nace<=5630
*information and communication
replace nace_1digit=8 if ind_nace>=5800 & ind_nace<=6399
*real estate and other services
replace nace_1digit=9 if ind_nace>=6800 

cap drop label nace_ind
cap label define nace_ind 1 "Agriculture & Forestry" 2 "Mining & Construction" ///
3 "Manufacturing" 4 "Energy Supply" 5"Wholesale & Retail Trade" 6 "Transportation & Storage" ///
7 "Accomodation & Food" 8 "Information" 9 "Other Services" , replace              

label values nace_1digit nace_ind

*Missing Turnover and Asset Excluded*
drop if turnover==.
drop if asset_total==.

**************Variables of Interest***********************************

xtset id year

*investment rate*
gen K=asset_tang+asset_intang
bysort id: gen I=K[_n]-K[_n-1]+depr
bysort id: gen I_net=K[_n]-K[_n-1]
bysort id: gen I_fixed=asset_tang[_n]-asset_tang[_n-1]+depr

bysort id: gen Irate=I/K[_n-1]
label var Irate "Gross Investment scaled by tangible and intangible asset"
bysort id: gen Irate2=I_fixed/asset_fixed[_n-1]
label var Irate2 "Gross Investment scaled by fixed asset"
bysort id: gen Irate_net=I_net/K[_n-1]
label var Irate_net "Net Investment scaled by tangible and intangible asset"

*winsorize investment rate*

winsor Irate, gen(Irate_w) p(0.025)
winsor Irate2, gen(Irate2_w) p(0.025)
winsor Irate_net, gen(Irate_net_w) p(0.025)

winsor Irate, gen(Irate_w99) p(0.01)
winsor Irate2, gen(Irate2_w99) p(0.01)
winsor Irate_net, gen(Irate_net_w99) p(0.01)

*inflation-deflated real series*

gen turnover_real=turnover/price_index*100/1000
gen K_real=K/price_index*100
replace gdp_capita=gdp_capita/price_index*100
replace unemp=unemp/100
gen ltdebt_real=debt_lt/price_index*100
gen int_real=int_paid/price_index*100
gen wage_real=wage/price_index*100

xtset id year
bysort id: gen cf_rate=cashflow/K[_n-1]
winsor cf_rate, gen (cf_w) p(0.01)
bysort id: gen lag_cf=cf_w[_n-1]
bysort id: gen K_lag=K_real[_n-1]

bysort id: gen turnover_growth=turnover/turnover[_n-1]-1
winsor turnover_growth, gen (turn_g) p(0.025)
bysort id: gen turn_g_lag=turn_g[_n-1]

bysort id: gen ltdebt_rate=debt_lt/asset_total
bysort id: gen int_rate=int_paid/asset_total

winsor ltdebt_rate, gen(ltdebt_w) p(0.01)
winsor int_rate, gen(int_w) p(0.01)

*wage rate*
bysort id: gen wage_rate=wage_real/turnover_real[_n-1]
label var wage_rate "Compensation scaled by lagged revenue"
sum wage_rate, d
winsor wage_rate, gen(wage_rate_w) p(0.01)


*other controls*
gen profit_margin=ebitda/turnover
label var profit_margin "EBITDA scaled by turnover"
winsor profit_margin, gen(pmargin_w) p(0.01)

*productivity*
gen productivity=turnover/wkrs
winsor productivity, gen(productivity_w) p(0.01)

gen age=year-incorp_year+1

*other controls on uo country level*

merge m:1 uo_ctry year using "${cbt_datadir}\Control\controls", keepusing(uo_*)

drop _merge

*region indicator
merge m:1 uo_ctry using "${cbt_datadir}\Other\uoctry_continent"
drop if _merge==2 
drop _merge

ren continent uo_continent

tab uo_continent

gen uo_africa=uo_continent=="africa"
gen uo_asia=uo_continent=="asia"
gen uo_eu=uo_continent=="europe"
gen uo_namerica=uo_continent=="north america"
gen uo_oceania=uo_continent=="oceania"
gen uo_samerica=uo_continent=="south america"

*WDI controls*
merge m:1 uo_ctry year using "${cbt_datadir}\Control\uoctry_macroclean"
drop if _merge==2
drop _merge
drop uo_export

merge m:1 uo_ctry year using "${cbt_datadir}\Control\uoctry_macroclean2", ///
keepusing (governance_estimate depth_fi efficiency_fi efficiency_fm stable_fi)
ren governance_estimate uo_governance
ren depth_fi uo_depth_fi
ren efficiency_fi uo_efficiency_fi
ren efficiency_fm uo_efficiency_fm
ren stable_fi uo_stable_fi

drop if _merge==2
drop _merge

*Exchange rate Euro-US Dollor*
merge m:1 year using "${cbt_datadir}\Control\eu_xrate"
replace  uo_gdppcap=uo_gdppcap/xrate
drop _merge

qui save "S:\Li Liu\Amadeus Downloads\ETPF\Financial\Disk Download\Exemption_MNE", replace
qui save "${cbt_datadir}\Exemption_MNE", replace



