Useful Data Management Queries

Shared Employee_Ids
select, a.person_id, concat(group_concat(distinct a.emplid)) as emplid1,, c.person_id, concat(group_concat(distinct c.emplid)) as emplid2 from _name_source_lookup a join (select emplid, max(person_id) as id1, min(person_id) as id2 from _name_source_lookup where emplid is not null and person_id != 0 group by emplid having count(distinct person_id) > 1) b on a.person_id = id1 join _name_source_lookup c on c.person_id = id2 group by a.person_id order by a.emplid

Duplicate Names
SELECT name, person_id, count(*), max(person_id), group_concat(distinct sourcetable) FROM _name_source_lookup n where duplicate_name = 1 group by name, person_id

People w/ >1 emplid
select group_concat(distinct name), person_id, concat(group_concat(distinct emplid)), count(distinct emplid) from _name_source_lookup where person_id != 0 group by person_id having count(distinct emplid) > 1

Gender Changes
select * from _name_source_lookup a group by person_id having count(distinct gender) > 1

Orphaned Person_ids
select * from people a left join _name_source_lookup b using (person_id) where b.person_id is null order by a.person_id

Tables containing people that only appear in that table
select sourcetable, sourcefield, count(*) as people, sum(num) as occurences from (select person_id, name, emplid, sourcetable, sourcefield, count(*) as num from _name_source_lookup group by person_id having count(distinct sourcetable, sourcefield) = 1) a group by sourcetable, sourcefield order by count(*) desc

All data sources with counts and whether they have emplids
select sourcetable, sourcefield, emplid is not null as has_emplid, count(distinct person_id) as people, count(*) as occurences from _name_source_lookup group by sourcetable, sourcefield, EMPLID is null order by count(distinct person_id) desc

People with low average_name_scores
SELECT a.person_id,, group_concat(distinct, a.average_name_score FROM _name_source_lookup a join _name_source_lookup b using(person_id) where a.average_name_score is not null and a.average_name_score <40 and a.person_id != 0 group by a.person_id, order by a.average_name_score