
/* updated 2-18-13 to compute summary stats for 18-75 instead of 18-80 year olds */


/* program for loading and appending the data */
capture program drop loadAppend
program define loadAppend
    syntax, data(string) append(int) if(str)
    
    quietly{
        /* append must be 0 or 1 */
        if `append' != 0 & `append' != 1 {
            noisily di "Error: append must be 0 (simple load) or 1 (load and append)"
        }
        
        /* just load the data if no append */
        if `append'==0 {
            use "`data'" if `if', clear
        }
        
        /* loop from 0 to 9 if append */
        if `append' == 1{
        
            /* save temp decile datasets with only the relevant stuff */
            forvalues d=0/9{
             use "`data'_`d'" if `if', clear    
                if `d'<9 save "$datadir/temp_`d'", replace
            }
            
            /* append together */
            forvalues d=0/8{
                append using "$datadir/temp_`d'"
                erase "$datadir/temp_`d'.dta"
            }
        }
    }
end

/********************************************************************************************/
/* compute summary statistics                                                               */
/********************************************************************************************/
    local time1 "$S_TIME"
    
/************************************************************/
/* 1. make a temporary data set                             */
/* 2. For each sample                                       */
/*  a. compute mean/var for each var                        */
/*  b. compute median earnings COP                          */
/*  c. save results into a temporary dataset                */
/* 3. Compute grand mean/sd, using the temporary dataset    */
/* 4. Print out results                                     */
/************************************************************/

local append 1

/* 1. make empty data set*/
foreach sample in s1Main s1SE{
    /* 1955 1968 */
    qui foreach y1 of numlist 1990{
        clear
        set obs 100
        local row = 0
    
        if `y1' == 1955 local y2 2005
        if `y1' == 1978 local y2 2005
        if `y1' == 1990 local y2 1999
        
    
        /* make summary stat placeholders */
        foreach var in varlist sex lfp nilf nilfLead real_earnings lifetimeEarning nearKink realWages real_se_earnings hasSEOld hasSE1 hasSE2{
            gen `var'M = .
            gen `var'6269M = .
    
            gen `var'V = .
            gen `var'6269V = .
        }
        
        ** place holder for earnings
        foreach pp in 10 25 50 75 90{
            gen real_earningsp`pp' = .
            gen real_earnings6269p`pp' = .
        }
    
        forvalues a = 62/69{
            gen lfp`a'M = .
            gen lfp`a'V = .
        }
    
        /* make count var placeholders */
        foreach var in nPeople nPeople6269 nObs nObs6269 decile{
            gen `var' = .
        }
        forvalues a = 62/69{
            gen nPeople`a' = .
        }
    
        forvalues d = 0/9{
    
            local ++row
            replace decile = `d' in `row'
            save "$datadir/summaryTable", replace
            
            /* 2a. load each decile, make summary stats */
            
            # delimit ;
            use "$datadir/ssa_`sample'_`d'" if 
                !missing(earnings) & 
                earnings != 0 & 
                year>=`y1' & year<=`y2' & 
                age>=18 & age<=75, 
                clear
            ;
            # delimit cr
            
						replace sex = "1" if sex == "M"
						replace sex = "2" if sex == "F"
            destring sex, replace force
            
            /* make SE earnings indicator, wage earnings */
            gen realWages = real_earnings - real_se_earnings if !missing(real_se_earnings)
            replace realWages = real_earnings if missing(real_se_earnings)
            gen hasSEOld = real_se_earnings>0 & !missing(real_se_earnings) if !missing(real_earnings)
            gen hasSE1 = real_se_earnings>0 & !missing(real_se_earnings) 
            gen hasSE2 = real_se_earnings>0 & !missing(real_se_earnings) & !missing(real_earnings)
                            
            /* find percent dead/percent not working | not self-employed*/
            capture gen yob = year-age
            sort id
            by id: egen nSE = total(hasSE1) 
            by id: egen nSE6269 = total(hasSE1 & age>=62 & age<=69)
            gen potentialRecords = 63 if yob<=1925
            replace potentialRecords = 1+2005-yob-18-nSE if  yob>1925
    
            gen pr6269 = 8-nSE6269 if yob<=1936
            replace pr6269 = 1944-yob-nSE6269  if yob>1936 & yob<=1943
            capture drop count
            gen count = 1
            sort id
            by id: gen realRecords = _N
            by id: egen rr6269 = total(age>=62 & age<=69 & earnings>0 & (startYear<=yob+65) )
                
            gen lfp = realRecords/potentialRecords
            gen lfp6269 = rr6269/pr6269
            egen tagPerson = tag(id)
            egen tagOlder = tag(id) if yob<=(2005-62)  & (startYear<=yob+65) 
    
            /* find LFP for 62/69 year olds */                    
            forvalues a=62/69{
                egen tag`a' = tag(id) if yob<=2005-`a'  & (startYear<=yob+65) 
                egen lfp`a' = sum(age==`a' & earnings>0 & (startYear<=yob+65) ), by(id)
            }
    
            gen nearKink = abs(real_distance)<=1000
    
            sum sex if tagPerson & (sex==1 | sex == 2)
                local sexM = r(mean)
                local sexV = r(Var)
            sum sex if tagOlder & (sex==1 | sex == 2)
                local sex6269M = r(mean)
                local sex6269V = r(Var)
                
            sum lfp if tagPerson
                local lfpM = r(mean)
                local lfpV = r(Var)
            sum lfp6269 if tagOlder
                local lfp6269M = r(mean)
                local lfp6269V = r(Var)
                
            forvalues a = 62/69{
                sum lfp`a' if tag`a'
                local lfp`a'M = r(mean)
                local lfp`a'V = r(Var)
            }
            
            sum nilf
                local nilfM = r(mean)
                local nilfV = r(Var)
            sum nilf if age>=62 & age<=69
                local nilf6269M = r(mean)
                local nilf6269V = r(Var)
            sum nilfLead
                local nilfLeadM = r(mean)
                local nilfLeadV = r(Var)
            sum nilfLead if age>=62 & age<=69
                local nilfLead6269M = r(mean)
                local nilfLead6269V = r(Var)
                
            sum real_earnings, det
                local real_earningsM= r(mean)
                local real_earningsV = r(Var)
                foreach pp of numlist 10 25 50 75 90{
                    local real_earningsp`pp' = `r(p`pp')'
                }                    
                
            sum real_earnings if age>=62 & age<=69, det
                local real_earnings6269M = r(mean)
                local real_earnings6269V = r(Var)
                foreach pp of numlist 10 25 50 75 90{
                    local real_earnings6269p`pp' = `r(p`pp')'
                }                    
                
            sum nearKink
                local nearKinkM = r(mean)
                local nearKinkV = r(Var)
            sum nearKink if age>=62 & age<=69 & (startYear<=yob+65) 
                local nearKink6269M = r(mean)
                local nearKink6269V = r(Var)
                
            sum  lifetimeEarning  if tagPerson
                local lifetimeEarningM = r(mean)
                local lifetimeEarningV = r(Var)
            sum lifetimeEarning if tagOlder  & age>=62 & age<=69 & (startYear<=yob+65) 
                local lifetimeEarning6269M = r(mean)
                local lifetimeEarning6269V = r(Var)
            
            sum realWages
                local realWagesM = r(mean)
                local realWagesV = r(Var)
            sum realWages if age >=62 & age<=69  & (startYear<=yob+65) 
                local realWages6269M = r(mean)
                local realWages6269V = r(Var)
                
            sum real_se_earnings 
                local real_se_earningsM = r(mean)
                local real_se_earningsV = r(Var)
            sum real_se_earnings if age >=62 & age<=69  & (startYear<=yob+65) 
                local real_se_earnings6269M = r(mean)
                local real_se_earnings6269V = r(Var)
                
            sum hasSEOld
                local hasSEOldM = r(mean)
                local hasSEOldV = r(Var)
            sum hasSEOld if age >=62 & age<=69 & (startYear<=yob+65) 
                local hasSEOld6269M = r(mean)
                local hasSEOld6269V = r(Var)
                
            sum hasSE1
                local hasSE1M = r(mean)
                local hasSE1V = r(Var)
            sum hasSE1 if age >=62 & age<=69 & (startYear<=yob+65) 
                local hasSE16269M = r(mean)
                local hasSE16269V = r(Var)
                
            sum hasSE2
                local hasSE2M = r(mean)
                local hasSE2V = r(Var)
            sum hasSE2 if age >=62 & age<=69 & (startYear<=yob+65) 
                local hasSE26269M = r(mean)
                local hasSE26269V = r(Var)
                
            count
                local nObs = r(N)
            count if age>=62 & age<=69 & (startYear<=yob+65) 
                local nObs6269 = r(N)
            count if tagPerson
                local nPeople = r(N)
            count if tagOlder
                local nPeople6269 = r(N)
                
            forvalues a=62/69{
                count if tag`a'
                local nPeople`a' = r(N)
            }  
            
            noisily di "`d':"
            noisily di "  lfp: `lfp6269M', `lfp62M', `lfp69M'"
            qui count if real_se_earnings > real_earnings  & !missing(real_se_earnings)
            local N = `r(N)'
            noisily di "  `N' with real_se_earnings > real_earnings"
            use "$datadir/summaryTable", clear
    
            /* 2b. save summary stats */        
            foreach var in sex lfp nilf nilfLead real_earnings lifetimeEarning nearKink realWages real_se_earnings hasSEOld hasSE1 hasSE2{
                replace `var'M = ``var'M' in `row'
                replace `var'6269M = ``var'6269M' in `row'
    
                replace `var'V = ``var'V' in `row'
                replace `var'6269V = ``var'6269V' in `row'
            }
            
            foreach pp of numlist 10 25 50 75 90{
                replace real_earningsp`pp' = `real_earningsp`pp'' in `row'
                replace real_earnings6269p`pp' = `real_earnings6269p`pp'' in `row'
            }                                    
            
            forvalues a = 62/69{
                replace lfp`a'M = `lfp`a'M' in `row'
                replace lfp`a'V = `lfp`a'V' in `row'
                replace nPeople`a' = `nPeople`a'' in `row'
            }  
            
            replace nPeople = `nPeople' in `row'
            replace nPeople6269 = `nPeople6269' in `row'
    
            replace nObs = `nObs' in `row'
            replace nObs6269 = `nObs6269' in `row'
            
        }
    
        /* 3. fill in table */
        gen t1c0 = "Var:" in 1
        gen t1c1 = "18-75" in 1
        gen t1c2 = "(SD)" in 1
        gen t1c3 = "62-69" in 1
        gen t1c4 = "(SD)" in 1
        local row = 1
    
        qui foreach var in sex lfp nilf nilfLead real_earnings lifetimeEarning nearKink realWages real_se_earnings hasSEOld hasSE1 hasSE2 {
            local ++row
            replace t1c0 = "`var'" in `row'
            local c = 0
            foreach age in "" "6269"{
                sum `var'`age'M [w=nObs`age']
                local ++c
                replace t1c`c' = string(r(mean), "%9.2f") in `row'
                local xVar = r(Var)
                noisily di "`var' Cross-group var (`age'): `xVar'"
    
                sum `var'`age'V [w=nObs`age']
                local ++c
                replace t1c`c' = string( sqrt( r(mean)+`xVar'), "%9.2f") in `row'
            }
        }
        qui forvalues a=62/69{
            local ++row
            replace t1c0 = "lfp`a'" in `row'
            
            sum lfp`a'M [w=nPeople`a']
            replace t1c1 = string(r(mean), "%9.2f") in `row'
            local xVar = r(Var)
            sum lfp`a'V [w=nPeople`a']
            replace t1c2 = string( sqrt( r(mean)+`xVar'), "%9.2f") in `row'   
        }
        
        foreach pp of numlist 10 25 50 75 90{
            local ++row
            replace t1c0 = "Earnings Percentile `pp'(COP)" in `row'
            sum real_earningsp`pp' [w=nObs]
            replace t1c1 = string(r(mean),"%9.2f") in `row'
            sum real_earnings6269p`pp' [w=nObs6269]
            replace t1c2 = string(r(mean),"%9.2f") in `row'
        }
        
        local ++row
        foreach var in nObs nPeople{
            local ++row
            replace t1c0 = "`var'" in `row'
    
            sum `var'
            replace t1c1 = string(r(sum), "%9.0f") in `row'
    
            sum `var'6269
            replace t1c3 = string(r(sum), "%9.0f") in `row'
        }
        noisily di ""
        noisily di "Summary stats for `y1'-2005, `sample':"
        noisily list t1c0-t1c4 in 1/`row', noobs clean noheader
    } /* end y1 loop */        
} /* end sample loop */
local time2 "$S_TIME"
noisily di "Start: `time1', stop: `time2'" 

/********************************************************/
/* Calculate median real_earninigs, various samples     */
/********************************************************/
local time1 = "$S_TIME"
** initialize for sample
local sample s1Main
qui foreach y1 of numlist 1955 1978 1990{
    
    if `y1' == 1955 local y2 2005
    if `y1' == 1978 local y2 2005
    if `y1' == 1990 local y2 1999

    foreach a1 of numlist 18 62{

        if `a1' == 18 local a2 75
        if `a1' == 62 local a2 69
        
        noisily di "Median real earnings, `y1'-`y2', `a1'-`a2':"
        
        ** Initialize counts, medians
        local NL = 0
        local NU = 0
        local N = .
        local L = 0
        local U = .
        local p = 50
        
        local j = 0
        local diff = 17
        
        while `diff' > 1 & `j'<10 {
        
            local ++j
            
            local subminz = .
            local submaxz = -1
            
            forval y = 0/9 {
                
                ** Load each the subsample
                # delimit ;
                use real_earnings age year using "$datadir/ssa_`sample'_`y'" if 
                    !missing(real_earnings) &  real_earnings != 0 & year>=`y1' & year<=`y2' & 
                    age>=`a1' & age<=`a2' & real_earnings > `L' & real_earnings < `U', 
                    clear
                ;
                # delimit cr
                
                ** find median
            	_pctile real_earnings if real_earnings >= `L' & real_earnings <= `U', p(`p')
            	if r(r1) < `subminz' local subminz = r(r1) 
            	if r(r1) > `submaxz' & r(r1) != . local submaxz = r(r1)
            
            }
            
            local L = `subminz'
            local U = `submaxz'
            
            local NL = 0
            local NU = 0
            local nObs = 0
            forval y = 0/9 {
            
                ** Load each the subsample, count in range
                # delimit ;
                use real_earnings age year using "$datadir/ssa_`sample'_`y'" if 
                    !missing(real_earnings) &  real_earnings != 0 & year>=`y1' & year<=`y2' & 
                    age>=`a1' & age<=`a2', 
                    clear
                ;
                # delimit cr
                
            	count if real_earnings < `L'
            	local NL = `NL' + r(N)
            	count if real_earnings > `U'
            	local NU = `NU' + r(N)
                local nObs = `nObs'+_N
            }
            
            local N = `nObs' - `NL' - `NU'
            local p = 100*(`nObs'/2-`NL')/`N'
            local diff = `N' - `nObs'/10
            noisily di "  done with iteration `j', p = `p'" 
        
        }
        
        ** load the data in this range
        # delimit ;
        loadAppend, data("$datadir/ssa_`sample'") append(`append')
          if("age>=`a1' & age<=`a2'
            &( (year>=`y1' & year<=(`y2')))
            & real_earnings > `L' & real_earnings < `U'"
          )
        ;
        # delimit cr
        
        
        quietly{
            _pctile real_earnings, p(`p')
            local med2 = r(r1)
            
            count if real_earnings <= `med2'
            local nlFinal = `NL'  + `r(N)'
            count if real_earnings >= `med2'
            local nuFinal = `NU' + `r(N)'
            local diffFinal = `nuFinal' - `nlFinal'
            count if real_earnings == `med2'
            local medCount = `r(N)'
            
            local nFinal = _N
        }
        noisily di "  Median: `med2'"
        noisily di "  Bounds: `U'-`L'"
        noisily di "  NU: `NU', NL: `NL', N Final: `nFinal'"
        noisily di "  N Above- N below: `diffFinal'"
        noisily di "  N at median: `medCount'"
    }
}
local time2 = "$S_TIME"

noisily di "Start: `time1', stop: `time2'"

    

