% Guvenen.m
%
%   Read the data from Guvenen, Karahan and Greg Kaplan.
%
%   Estimate sdH (the standard deviation of High type growth) following Jihee's
%   idea of using the 97.5th percentile as mean of the top 5 percent of growth rates 
%   and 99th percentile as the 80th percentile, then use the formula
%    x(p) = mu + sigma*norminv(p)

clear;
diarychad('Guvenen');
delete('Guvenen.ps');


% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%    muH and sigmaH
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

EG=readtable('GuvenenKarahan_cross_distribution.txt'); % Earnings Growth
summary(EG)

% >> EG(96:100,:)
%
%     year    repct      n        mean         sd         skew       kurt       p1        p2_5         p5         p10        p12_5        p25         p37_5         p50        p62_5        p75        p87_5       p90        p95       p97_5      p99  
%     ____    _____    _____    _________    _______    ________    ______    _______    _______    ________    ________    ________    ________    _________    _________    ________    ________    _______    _______    _______    _______    ______
%
%     1981     96      24651    -0.026679    0.46685    -0.53122    17.551    -1.5581    -1.1167    -0.78525     -0.4415    -0.31748    -0.10042     -0.03201    0.0086316    0.041364    0.086294     0.2215    0.29947    0.63602    0.90164    1.2897
%     1981     97      24612    -0.025299    0.47245     -0.4165    16.744    -1.5384    -1.1171    -0.78484    -0.46157    -0.34865    -0.10861    -0.035646    0.0072783     0.04007    0.088142     0.2411    0.32371    0.67492     0.9369    1.3284
%     1981     98      24584    -0.029575    0.49248    -0.77648    16.357    -1.6108    -1.1586    -0.83997    -0.49952    -0.36951    -0.11805    -0.039131    0.0059914    0.040923    0.093519    0.26633    0.35507    0.69926    0.98828    1.3489
%     1981     99      24576    -0.031037    0.51516    -0.54132    16.672    -1.6582    -1.1869    -0.88853    -0.52529    -0.39561    -0.13137    -0.046525    0.0037513    0.043978     0.10393    0.29021    0.37652    0.70254    0.98986    1.4265
%     1981    100      24363    -0.028727    0.59563    -0.47288    15.861    -1.9002    -1.3379     -1.0213    -0.61578    -0.48587    -0.16153    -0.056377    0.0034073    0.056675     0.14168    0.35922    0.46411     0.8224     1.1827    1.7349 
LastYear=max(EG.year);
years=(min(EG.year):max(EG.year))';
Nyears=length(years);

% TOP 1
disp ' ';
disp 'Earnings growth moments for the Top 1 percent of earners';
Top1=find(EG.repct==100);
GrowthTop1Earners=EG(Top1,:)
GrowthTop1Earners.sdH=(GrowthTop1Earners.p99-GrowthTop1Earners.p97_5) / norminv(0.8);

% TOP 2
% Averaging the top 2 percentiles moments. Not ideal, but seems okay given that
% we have to work with the moments only.
Top2=find(EG.repct>98);
Top2data=table2array(EG(Top2,:));
for t=1:Nyears;
    indx=find(Top2data(:,1)==years(t));
    data(t,:)=mean(Top2data(indx,:));
    data(t,1)=years(t);
end;
disp ' ';
disp 'Earnings growth moments for the Top 2 percent of earners';
GrowthTop2Earners=array2table(data,'VariableNames',EG.Properties.VariableNames)
GrowthTop2Earners.sdH=(GrowthTop2Earners.p99-GrowthTop2Earners.p97_5) / norminv(0.8);

% TOP 5
% Averaging the top 5 percentiles moments. Not ideal, but seems okay given that
% we have to work with the moments only.
Top5=find(EG.repct>95);
Top5data=table2array(EG(Top5,:));
for t=1:Nyears;
    indx=find(Top5data(:,1)==years(t));
    data(t,:)=mean(Top5data(indx,:));
    data(t,1)=years(t);
end;
disp ' ';
disp 'Earnings growth moments for the Top 5 percent of earners';
GrowthTop5Earners=array2table(data,'VariableNames',EG.Properties.VariableNames)
GrowthTop5Earners.sdH=(GrowthTop5Earners.p99-GrowthTop5Earners.p97_5) / norminv(0.8);


% AVERAGE 
% Averaging all percentiles moments. 
TopAll=find(EG.repct>0);
TopAlldata=table2array(EG(TopAll,:));
for t=1:Nyears;
    indx=find(TopAlldata(:,1)==years(t));
    data(t,:)=mean(TopAlldata(indx,:));
    data(t,1)=years(t);
end;
disp ' ';
disp 'Earnings growth moments for the AVERAGE of earners';
GrowthAverageEarners=array2table(data,'VariableNames',EG.Properties.VariableNames)


% NORMALIZE the growth rates ==> subtract off the average
disp 'Subtracting average change in log for entire population to normalize...';
GrowthTop1Earners.p99 = GrowthTop1Earners.p99-GrowthAverageEarners.mean;
GrowthTop1Earners.p97_5 = GrowthTop1Earners.p97_5-GrowthAverageEarners.mean;
GrowthTop1Earners.p95 = GrowthTop1Earners.p95-GrowthAverageEarners.mean;
GrowthTop2Earners.p99 = GrowthTop2Earners.p99-GrowthAverageEarners.mean;
GrowthTop2Earners.p97_5 = GrowthTop2Earners.p97_5-GrowthAverageEarners.mean;
GrowthTop2Earners.p95 = GrowthTop2Earners.p95-GrowthAverageEarners.mean;
GrowthTop5Earners.p99 = GrowthTop5Earners.p99-GrowthAverageEarners.mean;
GrowthTop5Earners.p97_5 = GrowthTop5Earners.p97_5-GrowthAverageEarners.mean;
GrowthTop5Earners.p95 = GrowthTop5Earners.p95-GrowthAverageEarners.mean;


% GRAPHS mu 
definecolors;
HPsmoother=100

figure(1); figsetup; makefigwide;
plot(years,GrowthTop1Earners.p99,'Color',myblue,'LineWidth',1);
plot(years,GrowthTop1Earners.p95,'Color',mygreen,'LineWidth',1);
plot(years,hpfilter(GrowthTop1Earners.p99,HPsmoother),'Color',myblue,'LineWidth',LW);
plot(years,hpfilter(GrowthTop1Earners.p95,HPsmoother),'Color',mygreen,'LineWidth',LW);
chadfig2('Year','Change in log earnings',1,0);
%[g,i99]=max(GrowthTop1Earners.p99); text(years(i99)+1,GrowthTop1Earners.p99(i99),'99th percentile');
[g,i99]=max(GrowthTop1Earners.p99); text(years(i99)+1,g*1.02,'99th percentile');
[g,i95]=max(GrowthTop1Earners.p95); text(years(i95)+1,g*1.02,'95th percentile');
print('-depsc','GuvenenMuTop1Earners');
title('Top 1% earners');
print('-dpsc','-append','Guvenen');

figure(1); figsetup; makefigwide;
plot(years,GrowthTop2Earners.p99,'Color',myblue,'LineWidth',1);
plot(years,GrowthTop2Earners.p95,'Color',mygreen,'LineWidth',1);
plot(years,hpfilter(GrowthTop2Earners.p99,HPsmoother),'Color',myblue,'LineWidth',LW);
plot(years,hpfilter(GrowthTop2Earners.p95,HPsmoother),'Color',mygreen,'LineWidth',LW);
chadfig2('Year','Change in log earnings',1,0);
%[g,i99]=max(GrowthTop2Earners.p99); text(years(i99)+1,GrowthTop2Earners.p99(i99),'99th percentile');
[g,i99]=max(GrowthTop2Earners.p99); text(years(i99)+1,g*1.02,'99th percentile');
[g,i95]=max(GrowthTop2Earners.p95); text(years(i95)+1,g*1.02,'95th percentile');
print('-depsc','GuvenenMuTop2Earners');
title('Top 2% earners');
print('-dpsc','-append','Guvenen');

figure(1); figsetup; makefigwide;
plot(years,GrowthTop5Earners.p99,'Color',myblue,'LineWidth',1);
plot(years,GrowthTop5Earners.p95,'Color',mygreen,'LineWidth',1);
plot(years,hpfilter(GrowthTop5Earners.p99,HPsmoother),'Color',myblue,'LineWidth',LW);
plot(years,hpfilter(GrowthTop5Earners.p95,HPsmoother),'Color',mygreen,'LineWidth',LW);
chadfig2('Year','Change in log earnings',1,0);
[g,i99]=max(GrowthTop5Earners.p99); text(years(i99)+1,g*1.02,'99th percentile');
[g,i95]=max(GrowthTop5Earners.p95); text(years(i95)+1,g*1.02,'95th percentile');
print('-depsc','GuvenenMuTop5Earners');
title('Top 5% earners');
print('-dpsc','-append','Guvenen');



% GRAPHS sd 
definecolors;
HPsmoother=100

figure(1); figsetup; makefigwide;
plot(years,GrowthTop1Earners.sd,'Color',myblue,'LineWidth',1);
plot(years,hpfilter(GrowthTop1Earners.sd,HPsmoother),'Color',myblue,'LineWidth',LW);
plot(years,GrowthTop5Earners.sd,'Color',mygreen,'LineWidth',1);
plot(years,hpfilter(GrowthTop5Earners.sd,HPsmoother),'Color',mygreen,'LineWidth',LW);
chadfig2('Year','Standard Deviation',1,0);
[g,i1]=max(GrowthTop1Earners.sd); text(years(i1)+1,g*1.0,'Top 1%');
[g,i5]=max(GrowthTop5Earners.sd); text(years(i5)+1,g*1.0,'Top 5%');
print('-depsc','GuvenenSigma');
title('StdDev of change in log earnings');
print('-dpsc','-append','Guvenen');

% GRAPHS sdH
definecolors;
HPsmoother=100

figure(1); figsetup; makefigwide;
plot(years,GrowthTop1Earners.sdH,'Color',myblue,'LineWidth',1);
plot(years,hpfilter(GrowthTop1Earners.sdH,HPsmoother),'Color',myblue,'LineWidth',LW);
plot(years,GrowthTop5Earners.sdH,'Color',mygreen,'LineWidth',1);
plot(years,hpfilter(GrowthTop5Earners.sdH,HPsmoother),'Color',mygreen,'LineWidth',LW);
chadfig2('Year','Standard Deviation',1,0);
[g,i1]=max(GrowthTop1Earners.sdH); text(years(i1)+1,g*1.0,'Top 1%');
[g,i5]=min(GrowthTop5Earners.sdH); text(years(i5)+1,g*1.0,'Top 5%');
print('-depsc','GuvenenSigmaH');
title('StdDev of change in log earnings (H)');
print('-dpsc','-append','Guvenen');


% AVERAGES 
yrs=[1981 1990;
     1991 2000;
     2001 2012];
disp ' ';
disp 'Averages:';
disp ' ( earner percentiles / growth percentile )';
tle='Year1 Year2 Top1/99 Top1/95 Top2/99 Top2/95 Top5/99 Top5/95';
fmt='%6.0f %6.0f %12.4f %12.4f %12.4f %12.4f %12.4f %12.4f';
meanTop1=average([GrowthTop1Earners.p99 GrowthTop1Earners.p95],yrs-1980);
meanTop2=average([GrowthTop2Earners.p99 GrowthTop2Earners.p95],yrs-1980);
meanTop5=average([GrowthTop5Earners.p99 GrowthTop5Earners.p95],yrs-1980);
cshow(' ',[yrs meanTop1 meanTop2 meanTop5],fmt,tle);


% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%    delta^e: What fraction of time does income fall by
%      40% or more?   log(0.6)=-0.51
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% Interpolate between p5, p10, p12_5, and p25 to find
% the place where the change in log earnings equals log(0.6).

Delta_e=zeros(Nyears,3); % For Top1, Top2, and Top5
prc=[.05 .10 .125 .25]';
vars={'p5','p10','p12_5','p25'};
for t=1:Nyears;
    Delta_e(t,1)=interplin4(prc,GrowthTop1Earners{t,vars},log(0.6));
    Delta_e(t,2)=interplin4(prc,GrowthTop2Earners{t,vars},log(0.6));
    Delta_e(t,3)=interplin4(prc,GrowthTop5Earners{t,vars},log(0.6));
end;

disp ' ';
disp 'Estimates of delta^e';
disp 'Interpolated percentiles such that change in log earnings equals log(0.6)=-0.51';
disp ' (for Top 1%, 2%, and 5% of earners)';
cshow(' ',[years Delta_e],'%6.0f %12.4f','Year Top1 Top2 Top5');

figure(1); figsetup; makefigwide;
plot(years,Delta_e(:,1),'Color',myblue,'LineWidth',1);
plot(years,Delta_e(:,3),'Color',mygreen,'LineWidth',1);
plot(years,hpfilter(Delta_e(:,1),HPsmoother),'Color',myblue,'LineWidth',LW);
plot(years,hpfilter(Delta_e(:,3),HPsmoother),'Color',mygreen,'LineWidth',LW);
chadfig2('Year','Change in log earnings',1,0);
%[g,i99]=max(GrowthTop1Earners.p99); text(years(i99)+1,GrowthTop1Earners.p99(i99),'99th percentile');
[g,i99]=max(Delta_e(:,1)); text(years(i99)+1,g*0.98,'Top 1%');
[g,i95]=min(Delta_e(:,3)); text(years(i95)+1.3,g*1.01,'Top 5%');
print('-depsc','GuvenenDelta');
title('Estimates of \delta^e for Top 1% and 5%');
print('-dpsc','-append','Guvenen');


% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%    pbar
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

P=readtable('GuvenenKarahan_impulse.txt');
summary(P)

% The impulse table, for computing our pbar 
%   year    repct    rank1yr    prob_success
%    ____    _____    _______    ____________
%
%    1981    1        1          0.69388     
%    1981    1        2          0.47475     
%    1981    1        3          0.37037     
%    1981    1        4          0.33028     
%    1981    1        5          0.yrs     

LastYear=max(P.year);
years=(min(P.year):max(P.year))';
Nyears=length(years);
ProbData=zeros(Nyears,100,100);  % Nyears x IncPrctile x GrowthPrctile
Nrecs=length(P.year); % 320000
for i=1:Nrecs;
    ProbData(P.year(i)-1980,P.repct(i),P.rank1yr(i))=P.prob_success(i);
end;
ProbData(LastYear-1980,:,:)=NaN; % Was 0 in original source, but missing.

% ProbData(LastYear-1980,:,:)=[];  % Actually, let's just remove it
% years(end)=[];
% Nyears=length(years);

% Compute ProbStayHighGrowth in several ways:
% 
%   1. Top 1%  => repct=100 and average rank1yr=100
%   2. Top 1%  => repct=100 and average rank1yr=96-100
%   3. Top 5%  => repct=96-100 and average rank1yr=100
%   4. Top 5%  => repct=96-100 and average rank1yr=96-100

ProbStayHighGrowth=zeros(Nyears,4);
ProbStayHighGrowth(:,1)=ProbData(:,100,100);
ProbStayHighGrowth(:,2)=mean(ProbData(:,100,96:100),3);
ProbStayHighGrowth(:,3)=mean(ProbData(:,96:100,100),2);
ProbStayHighGrowth(:,4)=mean(mean(ProbData(:,96:100,96:100),2),3);

definecolors;
figure(1); figsetup;
plot(years,ProbStayHighGrowth,'LineWidth',LW);
chadfig2('Year','ProbStayHighGrowth',1,0);
makefigwide
print('-dpsc','-append','Guvenen');

figure(1); figsetup;
plot(years,ProbStayHighGrowth(:,2),'LineWidth',1);
plot(years(1:end-1),hpfilter(packr(ProbStayHighGrowth(:,2)),HPsmoother),'LineWidth',LW);
chadfig2('Year','Probability',1,0);
makefigwide
print('-depsc','GuvenenPBar');
title('prob of high growth(t,t+1) to high growth(t+1,t+2) for top 1% earners');
print('-dpsc','-append','Guvenen');

disp ' ';
disp 'Probability a fast-growing top earner remains fast growing:';
disp '( earner prctile / initial growth prctile )';
tle='Year Top1/Top1 Top1/Top5 Top5/Top1 Top5/Top5';
fmt='%6.0f %12.4f';
cshow(' ',[years ProbStayHighGrowth],fmt,tle);

yrs=[1981 1990;
     1991 2000;
     2001 2011];
disp ' ';
disp 'Averages:';
tle='Year1 Year2 Top1/Top1 Top1/Top5 Top5/Top1 Top5/Top5';
fmt='%6.0f %6.0f %12.4f %12.4f %12.4f %12.4f';
cshow(' ',[yrs average(ProbStayHighGrowth,yrs-1980)],fmt,tle);

save Guvenen

diary off;