function []=lorenzdata(year,zstart,cpi,includebusiness);
% function []=lorenzdata(fname,zstart,cpi,includebusiness);
%
% Find records with incomes greater than z in file fname.txt
% Tabulate zmean/z as in Saez (2001)
%   zmean / z = Mean of incomes above z, divided by z
%
%   year=2005     year to use --> x2005.txt
%   zstart=10000  minimum income to consider
%   cpi = 83      value of CPI in corresponding year (will adjust to 100 year)
%   includebusiness = 0    Just wage income unless includebusiness==1
%
%  09/23/16: Update includebusiness to Piketty-Saez "entrepreneurial income"
%       “data11+data17+data21+data75+data79” = Wages/Salaries + Entrepreneurial Income

wagevar=11  % Wage and salaries
entrevar1=17%   'data17 business net income (schedule C)'	  
entrevar2=21%   'data21 farm income'                             
entrevar3=75%   'data75 net partnership income or loss '         
entrevar4=79%   'data79 small business corp total income or loss'
entrevars=[entrevar1 entrevar2 entrevar3 entrevar4];

NumtoShow=50
recs=(1:NumtoShow)';


if ~exist('includebusiness'); includebusiness=0; end;
if ~exist('cpi'); cpi=100; end;

disp ' '; 

fname=['x' num2str(year)];
eval(['load /home/data/soi/taxsim/txt/' fname '.txt']);
eval(['data=' fname '; clear ' fname]); 

% Data before cleaning
entreincome=sum(data(:,entrevars)')';
fmt='%4.0f %8.0f %8.0f %15.0f %15.0f %10.0f';
cshow(' ',[recs data(recs,[1 2 wagevar]) entreincome(recs)],fmt,'Line Weight Married Wage Business');

% Clean out if not married filing jointly
married=data(:,2); % married=2 for married-joint filing

disp ' ';
if ~includebusiness;
    disp 'Income Measure = Wages and Salaries';
    income=data(married==2,wagevar);
else;
    disp 'Income Measure = Wages/Salaries + Piketty-Saez Entrepreneurial Income';
    disp '      data11+data17+data21+data75+data79 = Wages/Salaries + Entrepreneurial Income'
    disp 'data17 business net income (schedule C)'	
    disp 'data21 farm income'
    disp 'data75 net partnership income or loss '
    disp 'data79 small business corp total income or loss'
    income=data(married==2,wagevar)+entreincome(married==2);
end;

weight=data(married==2,1); % sample weight -- this many "people" with this observation
N=length(income);
fprintf('There are %12.0f records in total\n',N);

% First, just show some data
disp 'Here is what the first 50 records look like before cpi correction';
cshow(' ',[weight(1:50) income(1:50)],'%8.0f %12.0f','Weight Income');

% CPI correction
income=income/cpi*100;

% First, just show some data
disp 'Here is what the first 50 records look like after cpi correction';
cshow(' ',[weight(1:50) income(1:50)],'%8.0f %12.0f','Weight Income');


% Now sort the data
[ysort,indx]=sort(income);
wsort=weight(indx); % sorted weight


indx=find(ysort>zstart);
pstart=floor(indx(1)/N*100);
fprintf('The value %8.0f corresponds to the %5.2f percentile\n',[zstart pstart]);
step=floor(N/1000);

Results=[];
Prctiles=[pstart:.5:99 99.1:.01:99.99];
% Step up by percentile rather than income -- program will run a lot faster!
disp ' ';
disp 'Percentile     Income    zmean/z     Obs      TotWeight';
disp '-------------------------------------------------------';
for p=Prctiles;
    i=floor(p/100*N);
    totpeople=sum(wsort(i:end));
    totincome=sum(ysort(i:end).*wsort(i:end));
    zmean=totincome/totpeople;
    z=ysort(i);
    Results=[Results; [p z zmean/z N-i totpeople]];
    fprintf('%5.2f %15.0f %10.6f %8.0f %12.0f\n',Results(end,:));
end;
disp 'For our last cutoff, here are the number of incomes:';
fprintf('Number of observations: %6.0f      Number of people(by weight): %6.0f\n',[N-i totpeople]);

pctile=Results(:,1);
z=Results(:,2);
zmeanz=Results(:,3);

figure(1); clf;
plot(pctile,zmeanz);
xlabel('Percentile of Married-Joint Incomes');
ylabel('mean(y | y>z) / z');

figure(2); clf;
plot(log10(z),zmeanz);
xlabel('log10(z)');
ylabel('mean(y | y>z) / z');

eval(['save -ascii ' fname 'Lorenz.asc Results']);
