Data management - Adding new Faculty_years data

* import faculty_appointments_07 into temporary table add the columns school_id, department_id, faculty_id, person_id. use greg's import_csv script

* get new department codes and insert into dept_codes table

update DEPT_CODES using -
insert into dept_codes select null, dept, fyr, dptname, null from temp_fac_years group by dept, fyr;

do same check for dept1, dept2, dept3

update dept_codes A join departments_aliases B on A.dept_desc = B.alias set A.department_id = B.department_id where A.fiscal_year in (2009, 2010);

* insert new department_ids into department_codes using department_aliases
* change admin and tenured fields to be 1 or 0
* change hire and appt dates to actual date strings: update faculty_appointments_raw_07 set appt_date = str_to_date(appt_date, '%m/%d/%y')
* fix the bad appt and hire dates: update faculty_appointments_raw_07 set appt_date = (appt_date - interval 100 year) where appt_date > 2009
* truncate the gender field: update faculty_appointments_raw_07 set gender=substr(gender, 1, 1)

* update school_id
- update temp_fac_years A join school_aliases B on A.schname = B.alias set A.school_id = B.school_id;
- select * from temp_fac_years where school_id = ''
- manually fix (update school_aliases) the schools which could not be matched

* update department_ids
- update temp_fac_years A join departments_aliases B on A.dptname = B.alias set A.department_id = B.department_id;
- select * from temp_fac_years where department_id = ''
- manually fix (update school_aliases) the schools which could not be matched

* update person_id
- update temp_fac_years a join _name_source_lookup b using(emplid) set a.person_id = b.person_id
- select * from temp_fac_years where department_id = '' - people who are not matched
- match such people using name matching script
- for score > 95 declare a correct match
- check for one id being assigned more than one ids
- need to make sure you're not matching against more than one person - there a field called 'duplicate_name' in _name_source_lookup which will help you here. It's set for every name that appears in the table with two different person_ids
- update temp_fac_years a join _match_fac_years b on upper( = upper(b.name1) set person_id = b.id_b where b.match = 1 and a.person_id = '';
- select * from temp_fac_years where person_id = '' order by str_to_date(hire_date, '%m/%d/%y') asc;

* Now for remaining faculty, that did not match on EMP_ID or by NAME - give them new person_id
- update back these new Faculty's person-IDs in the faculty_raw table insert them into the 'people' table, but yeah. you can just insert the names and/or emplids
- the other fields will get updated by the update_meta script
- insert into people select null, name, null, emplid, null, null, null, 0, 0, null, null, null, null from temp_fac_years a where a.person_id = '' group by emplid;
- update temp_fac_years a join people b on a.emplid = b.employee_id set a.person_id = b.person_id where a.person_id = '' and b.employee_id is not null;

NOW insert the data -
- Add a new field to the faculty_years table - this helps while updating the faculty_appointments table
- alter table minerva.faculty_years add column _input_id int(11);
- insert into faculty_years select null, area, fyr, name, degree, level, tenure, ethnic, gender, degyear, sch, pct,pdept, pjcc, pline, admin, dept, dptname, schname, jccname, jcc, age, status, statusn, contract, totalpct, tier, line, str_to_date(hire_date, '%m/%d/%Y'), str_to_date(appt_start_date, '%m/%d/%Y'),school_id, department_id, person_id, dptname, department_id, null, null, null, id from temp_fac_years
- Repeat this for each of the dept associated with the faculty (MOSTLY 5 times, increment all numbers dept1/tier1/1 etc by 1 each time) -----
- insert into faculty_appointments select null, faculty_year_id, a.person_id, a.dept1, pct1, jcc1, line1, tier1, 1, dept1 from temp_fac_years a join faculty_years on id = _input_id where dept1 != ''
- update department ids in faculty appointments (join in departments aliases and dept_codes)
- you can skip this step by just using the following :
- select null, faculty_year_id, person_id, department_id, pct1+0, jcc1, line1, tier1, 1, dept1 from (select department_id, dept_code from dept_codes group by dept_code) c join (select faculty_year_id, a.person_id, dept1 as _dept_code, pct1, jcc1, line1, tier1, dept1 from temp_fac_years a join faculty_years b on = b._input_id where dept1 != '') as d on c.dept_code = d._dept_code;
- once done remove the temporary column taht was added
- alter table faculty_years drop column _input_id;

- take backup dump of all the tables you end up updating - before UPDATING them on server.
- sqldump command here