--------------------------------------------------------------------------------------

      name:  <unnamed>

       log:  C:\Documents and Settings\Michael Rosenfeld\My Documents\newer web pages\

> soc_meth_proj3\2010_logs\class_ten.log

  log type:  text

 opened on:  25 Feb 2010, 13:15:53

 

*NOTE: This part of the log starts before class, The commented part of the log is below, wherein I repeated some of these commands.

 

. regress incwage  US_born_pct

 

      Source |       SS       df       MS              Number of obs =      51

-------------+------------------------------           F(  1,    49) =   14.84

       Model |   104897990     1   104897990           Prob > F      =  0.0003

    Residual |   346387525    49  7069133.17           R-squared     =  0.2324

-------------+------------------------------           Adj R-squared =  0.2168

       Total |   451285515    50   9025710.3           Root MSE      =  2658.8

 

------------------------------------------------------------------------------

     incwage |      Coef.   Std. Err.      t    P>|t|     [95% Conf. Interval]

-------------+----------------------------------------------------------------

 US_born_pct |  -24075.79       6250    -3.85   0.000    -36635.63   -11515.94

       _cons |   41551.87   5790.425     7.18   0.000     29915.58    53188.17

------------------------------------------------------------------------------

 

. predict new_M1_predicted

(option xb assumed; fitted values)

 

. gen new_M1_residuals= incwage- new_M1_predicted

 

. twoway (scatter  new_M1_residuals  US_born_pct (mlabel(statefip))

parentheses do not balance

r(198);

 

. twoway (scatter  new_M1_residuals  US_born_pct, mlabel(statefip))

 

. gen abs_new_m1_residual=abs( new_M1_residuals)

 

. gsort - abs_new_m1_residual

 

. list  incwage US_born_pct statefip  new_M1_residuals if _n<10

 

     +-----------------------------------------------------------+

     |     incwage   US_bor~t               statefip   new_M1_~s |

     |-----------------------------------------------------------|

  1. | 24803.26155     .89299            Connecticut    4750.814 |

  2. | 17874.36641    .789128                Florida   -4678.628 |

  3. | 24575.59697    .898431               Maryland    4654.167 |

  4. | 23241.05063    .938505                 Alaska    4284.434 |

  5. | 24481.98743    .879239   District of Columbia    4098.485 |

     |-----------------------------------------------------------|

  6. | 13746.25247    .986109                Montana   -4064.271 |

  7. |    13760.09    .988432          West Virginia   -3994.506 |

  8. | 22967.69873    .935135              Minnesota    3929.941 |

  9. | 15344.73038    .933279             New Mexico   -3737.723 |

     +-----------------------------------------------------------+

 

. summarize  M1_dfbeta

 

    Variable |       Obs        Mean    Std. Dev.       Min        Max

-------------+--------------------------------------------------------

   M1_dfbeta |        51    .0047355    .1740446  -.2193236   .7528799

 

. predict new_dfbeta_M1, dfbeta( US_born_pct)

 

. rvfplot, yline(0) mlabel(statefip)

 

. sort incwage

 

. twoway (scatter incwage US_born_pct, mlabel(statefip)) (scatter  new_M1_predicted US_born_pct, connect(l))

 

. list statefip  new_M1_residuals new_dfbeta_M1

 

     +----------------------------------------------+

     |             statefip   new_M1_~s   new_dfb~1 |

     |----------------------------------------------|

  1. |              Montana   -4064.271   -.2340724 |

  2. |        West Virginia   -3994.506   -.2389347 |

  3. |         North Dakota    -3432.41   -.1760496 |

  4. |             Arkansas   -3270.563   -.1553531 |

  5. |          Mississippi   -2970.973   -.1668143 |

     |----------------------------------------------|

  6. |           New Mexico   -3737.723   -.0297481 |

  7. |         South Dakota   -2093.199   -.1111761 |

  8. |              Alabama   -1900.736    -.096484 |

  9. |            Louisiana   -2009.218   -.0778688 |

 10. |             Kentucky    -1581.19   -.0652206 |

     |----------------------------------------------|

 11. |                Idaho   -2347.806   -.0288855 |

 12. |              Wyoming   -1115.151   -.0584846 |

 13. |             Oklahoma   -1589.112   -.0505706 |

 14. |       South Carolina   -521.8787   -.0260187 |

 15. |             Nebraska   -911.8399   -.0258626 |

     |----------------------------------------------|

 16. |              Florida   -4678.628    .6546844 |

 17. |                Maine   -407.2457    -.014631 |

 18. |              Arizona   -2947.986    .1866927 |

 19. |               Kansas   -1228.321    -.001978 |

 20. |              Vermont   -360.9295   -.0099085 |

     |----------------------------------------------|

 21. |                 Utah   -652.5552   -.0088312 |

 22. |                 Iowa    176.3168    .0050437 |

 23. |         Pennsylvania    266.8622    .0084496 |

 24. |              Indiana    621.9144    .0280446 |

 25. |              Georgia    206.3659    .0043481 |

     |----------------------------------------------|

 26. |               Oregon   -571.6023    .0091564 |

 27. |            Tennessee    1228.311    .0593712 |

 28. |                Texas   -1376.676    .0739929 |

 29. |       North Carolina    797.6165    .0177797 |

 30. |               Hawaii   -2289.112    .2323307 |

     |----------------------------------------------|

 31. |               Nevada   -1607.705    .1416568 |

 32. |                 Ohio    1703.592    .0633283 |

 33. |             Missouri    2073.707    .0769217 |

 34. |         Rhode Island    666.6874   -.0121871 |

 35. |           California   -3396.939    .7651464 |

     |----------------------------------------------|

 36. |             New York   -2126.592    .3214945 |

 37. |            Wisconsin    2491.271    .0759981 |

 38. |             Delaware    2492.276     .028916 |

 39. |           Washington    1925.564   -.0201137 |

 40. |        New Hampshire    2906.576    .0681464 |

     |----------------------------------------------|

 41. |             Virginia    2564.906   -.0285459 |

 42. |             Illinois    2268.349   -.0553845 |

 43. |             Michigan    3495.337    .0563978 |

 44. |            Minnesota    3929.941     .038025 |

 45. |               Alaska    4284.434    .0549285 |

     |----------------------------------------------|

 46. |             Colorado    3557.366   -.1106046 |

 47. |        Massachusetts    2603.065    -.181481 |

 48. | District of Columbia    4098.485   -.1720585 |

 49. |             Maryland    4654.167   -.1125866 |

 50. |          Connecticut    4750.814   -.1393247 |

     |----------------------------------------------|

 51. |           New Jersey    3420.941   -.3097655 |

     +----------------------------------------------+

 

. gen new_abs_dfbeta_M1=abs( new_dfbeta_M1)

 

. gsort - new_abs_dfbeta_M1

 

. list statefip incwage  abs_resid new_dfbeta_M1 if _n<10

 

     +----------------------------------------------------+

     |      statefip       incwage   abs_re~d   new_dfb~1 |

     |----------------------------------------------------|

  1. |    California   20573.98456   4001.504    .7651464 |

  2. |       Florida   17874.36641     4944.9    .6546844 |

  3. |      New York    20716.6877   2424.827    .3214945 |

  4. |    New Jersey   24990.40441   2924.286   -.3097655 |

  5. | West Virginia      13760.09   4161.475   -.2389347 |

     |----------------------------------------------------|

  6. |       Montana   13746.25247   3784.092   -.2340724 |

  7. |        Hawaii   19547.63379   2536.164    .2323307 |

  8. |       Arizona    17986.0649   3372.185    .1866927 |

  9. | Massachusetts   23697.95964   2277.071    -.181481 |

     +----------------------------------------------------+

 

. save "C:\Documents and Settings\Michael Rosenfeld\My Documents\newer web pages\

> soc_meth_proj3\fifty_state_dataset.dta", replace

file C:\Documents and Settings\Michael Rosenfeld\My Documents\newer web pages\soc

> _meth_proj3\fifty_state_dataset.dta saved

 

*OK, HERE IS WHERE CLASS ACTUALLY STARTS, AND THIS IS THE PART OF THE LOG I WILL PUT MORE COMMENTS IN:

 

. twoway (scatter incwage  US_born_pct, mlabel(statefip))

 

*syntax is scatter Y X, options. In this case the option puts labels (stored in statefip) next to every state

 

. *It seems like states with more immigration have higher income...

 

. twoway (scatter incwage US_born_pct, mlabel(statefip)) lfit(incwage  US_born_pct )

) required

r(100);

 

. twoway (scatter incwage US_born_pct, mlabel(statefip)) (lfit incwage  US_born_pct)

 

. * lfit draws a linear fit, that is the same as a basic regression line X predicting Y. Residuals represent the difference between the actual value, the point, and the line. New Jersey has a positive residual, California has a negative residual. California income is a little less than our simple model would predict

 

. *California is going to turn out to be our most influential point in dfbeta terms

 

* Before we can play with the predicted values and residuals, we have to run the regression ourselves.

 

. regress incwage  US_born_pct

 

      Source |       SS       df       MS              Number of obs =      51

-------------+------------------------------           F(  1,    49) =   14.84

       Model |   104897990     1   104897990           Prob > F      =  0.0003

    Residual |   346387525    49  7069133.17           R-squared     =  0.2324

-------------+------------------------------           Adj R-squared =  0.2168

       Total |   451285515    50   9025710.3           Root MSE      =  2658.8

 

------------------------------------------------------------------------------

     incwage |      Coef.   Std. Err.      t    P>|t|     [95% Conf. Interval]

-------------+----------------------------------------------------------------

 US_born_pct |  -24075.79       6250    -3.85   0.000    -36635.63   -11515.94

       _cons |   41551.87   5790.425     7.18   0.000     29915.58    53188.17

------------------------------------------------------------------------------

 

. summarize  US_born_pct

 

    Variable |       Obs        Mean    Std. Dev.       Min        Max

-------------+--------------------------------------------------------

 US_born_pct |        51     .924551    .0601614   .7302337   .9884319

 

. *the variable  US_born_pct would have to be multiplied by 100 to get a real percentage...

 

. predict M1_new_predicted

(option xb assumed; fitted values)

 

. *if we wanted to do the lfit graph ourselves

 

. sort incwage

 

. twoway (scatter incwage US_born_pct, mlabel(statefip)) (scatter  M1_new_predicted US_born_pct, connect(l))

 

*The above is my hand-made lfit plot, with the simple scatter Y X on the left, and on the right a scatter PredictedY X, connected by a line.

 

. *now generate the residuals from my predicted values

 

. gen M1_new_resid= incwage- M1_new_predicted

 

. twoway (scatter  M1_new_resid  US_born_pct, mlabel(statefip))

 

. *don't forget to print to PDF or save your graphs along the way so you can open them later. This is important. Each new graph you make will erase the previous one unless you print to PDF or save. And remember once you make the graph you can use the graph editor to add text or make other changes…

 

. rvfplot, yline(0) mlabel(statefip)

 

. * rvfplot is stata's built in function for graphing residuals versus fits (rvf), and it automatically refers to the last regression run. What is the point with largest residual in abs value?

 

. gen M1_new_resid_abs=abs( M1_new_resid)

 

. gsort - M1_new_resid_abs

 

* First we take abs value of our variable, then we gsort -, to sort in descending order on that variable. The minus after gsort indicates to sort in descending order on that variable. Then we list the first _n cases.

 

. list statefip incwage  M1_new_resid M1_new_resid_abs if _n<10

 

     +-----------------------------------------------------------+

     |             statefip       incwage   M1_new~id   M1_new~s |

     |-----------------------------------------------------------|

  1. |          Connecticut   24803.26155    4750.814   4750.814 |

  2. |              Florida   17874.36641   -4678.628   4678.628 |

  3. |             Maryland   24575.59697    4654.167   4654.167 |

  4. |               Alaska   23241.05063    4284.434   4284.434 |

  5. | District of Columbia   24481.98743    4098.485   4098.485 |

     |-----------------------------------------------------------|

  6. |              Montana   13746.25247   -4064.271   4064.271 |

  7. |        West Virginia      13760.09   -3994.506   3994.506 |

  8. |            Minnesota   22967.69873    3929.941   3929.941 |

  9. |           New Mexico   15344.73038   -3737.723   3737.723 |

     +-----------------------------------------------------------+

 

. *the _n is just the order of observations, after we sorted on abs(residual), the biggest residuals come first.

 

. *now for our measure of influence, the dfbeta

 

. predict M1_new_dfbetas, dfbeta(US_born_pct)

 

. gen M1_new_dfbetas_abs=abs( M1_new_dfbetas)

 

. gsort - M1_new_dfbetas_abs

 

. list statefip incwage  M1_new_resid  M1_new_dfbetas if _n<10

 

     +-----------------------------------------------------+

     |      statefip       incwage   M1_new~id   M1_new~as |

     |-----------------------------------------------------|

  1. |    California   20573.98456   -3396.939    .7651464 |

  2. |       Florida   17874.36641   -4678.628    .6546844 |

  3. |      New York    20716.6877   -2126.592    .3214945 |

  4. |    New Jersey   24990.40441    3420.941   -.3097655 |

  5. | West Virginia      13760.09   -3994.506   -.2389347 |

     |-----------------------------------------------------|

  6. |       Montana   13746.25247   -4064.271   -.2340724 |

  7. |        Hawaii   19547.63379   -2289.112    .2323307 |

  8. |       Arizona    17986.0649   -2947.986    .1866927 |

  9. | Massachusetts   23697.95964    2603.065    -.181481 |

     +-----------------------------------------------------+

 

. save "C:\Documents and Settings\Michael Rosenfeld\My Documents\newer web pages\s

> oc_meth_proj3\fifty_state_dataset.dta", replace

file C:\Documents and Settings\Michael Rosenfeld\My Documents\newer web pages\soc_

> meth_proj3\fifty_state_dataset.dta saved

 

. save "C:\Documents and Settings\Michael Rosenfeld\My Documents\newer web pages\

> soc_meth_proj3\fifty_state_dataset.dta", replace

file C:\Documents and Settings\Michael Rosenfeld\My Documents\newer web pages\soc

> _meth_proj3\fifty_state_dataset.dta saved

 

. *California and Florida are the two states that have the most influence over the slope of the regression line. They are the most influential points. They happen to be outliers in X (look at the graphs), that is they are outliers in pct US born, and that is what makes them influential.

 

. exit, clear