@misc{fleming2023medalign,
      title={MedAlign: A Clinician-Generated Dataset for Instruction Following with Electronic Medical Records}, 
      author={Scott L. Fleming and Alejandro Lozano and William J. Haberkorn and Jenelle A. Jindal and Eduardo P. Reis and Rahul Thapa and Louis Blankemeier and Julian Z. Genkins and Ethan Steinberg and Ashwin Nayak and Birju S. Patel and Chia-Chun Chiang and Alison Callahan and Zepeng Huo and Sergios Gatidis and Scott J. Adams and Oluseyi Fayanju and Shreya J. Shah and Thomas Savage and Ethan Goh and Akshay S. Chaudhari and Nima Aghaeepour and Christopher Sharp and Michael A. Pfeffer and Percy Liang and Jonathan H. Chen and Keith E. Morse and †Emma P. Brunskill and †Jason A. Fries and †Nigam H. Shah},
      year={2024},
      eprint={2308.14089},
      archivePrefix={arXiv},
      primaryClass={cs.CL},
      booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
      url={https://arxiv.org/abs/2308.14089}
}

@misc{guo2023multicenter,
      title={A Multi-Center Study on the Adaptability of a Shared Foundation Model for Electronic Health Records}, 
      author={*Lin Lawrence Guo and *Jason Fries and Ethan Steinberg and Scott Lanyon Fleming and Keith Morse and Catherine Aftandilian and Jose Posada and Nigam Shah and Lillian Sung},
      year={2023},
      journal={Preprint},
      eprint={2311.11483},
      archivePrefix={arXiv},
      primaryClass={cs.LG},
      url={https://arxiv.org/abs/2311.11483}
}

@article{lemmon2023self,
  title={Self-supervised machine learning using adult inpatient data produces effective models for pediatric clinical prediction tasks},
  author={Lemmon, Joshua and Guo, Lin Lawrence and Steinberg, Ethan and Morse, Keith E and Fleming, Scott Lanyon and Aftandilian, Catherine and Pfohl, Stephen R and Posada, Jose D and Shah, Nigam and Fries, Jason Alan and Sung, Lillian},
  journal={Journal of the American Medical Informatics Association},
  volume={30},
  number={12},
  pages={2004--2011},
  year={2023},
  publisher={Oxford University Press},
  url={https://academic.oup.com/jamia/article-abstract/30/12/2004/7252874}
}

@inproceedings{
huang2023inspect,
title={{INSPECT}: A Multimodal Dataset for Pulmonary Embolism Diagnosis and Prognosis},
author={Shih-Cheng Huang and Zepeng Huo and Ethan Steinberg and Chia-Chun Chiang and Matthew P. Lungren and Curtis Langlotz and Serena Yeung and Nigam Shah and Jason Alan Fries},
booktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},
year={2023},
url={https://openreview.net/forum?id=3sRR2u72oQ}
}

@inproceedings{
wornow2023ehrshot,
title={{EHRSHOT}: An {EHR} Benchmark for Few-Shot Evaluation of Foundation Models},
author={Michael Wornow and Rahul Thapa and Ethan Steinberg and †Jason Fries and †Nigam Shah},
booktitle={Thirty-seventh Conference on Neural Information Processing Systems Datasets and Benchmarks Track},
year={2023},
url={https://openreview.net/forum?id=CsXC6IcdwI}
}

@misc{steinberg2023motor,
      title={MOTOR: A Time-To-Event Foundation Model For Structured Medical Records}, 
      author={*Ethan Steinberg and *Jason Alan Fries and Yizhe Xu and Nigam Shah},
      year={2023},
      journal={Preprint},
      eprint={2301.03150},
      archivePrefix={arXiv},
      primaryClass={cs.LG},
      url={https://arxiv.org/abs/2301.03150}
}

@article{wornow2023shaky,
  title={The shaky foundations of large language models and foundation models for electronic health records},
  author={Wornow, Michael and Xu, Yizhe and Thapa, Rahul and Patel, Birju and Steinberg, Ethan and Fleming, Scott and Pfeffer, Michael A and Fries, Jason and Shah, Nigam H},
  journal={npj Digital Medicine},
  volume={6},
  number={1},
  pages={135},
  year={2023},
  url={https://www.nature.com/articles/s41746-023-00879-8},
  publisher={Nature Publishing Group UK London}
}

@article{guo2023ehr,
  title={EHR foundation models improve robustness in the presence of temporal distribution shift},
  author={Guo, Lin Lawrence and Steinberg, Ethan and Fleming, Scott Lanyon and Posada, Jose and Lemmon, Joshua and Pfohl, Stephen R and Shah, Nigam and Fries, †Jason Alan and Sung, †Lillian},
  journal={Scientific Reports},
  volume={13},
  number={1},
  pages={3767},
  year={2023},
  url={https://www.nature.com/articles/s41598-023-30820-8},
  publisher={Nature Publishing Group UK London}
}

@inproceedings{NEURIPS2022_a583d219,
 author = {Fries, *Jason and Weber, *Leon and Seelam, *Natasha and Altay, *Gabriel and Datta, Debajyoti and Garda, Samuele and Kang, Sunny and Su, Rosaline and Kusa, Wojciech and Cahyawijaya, Samuel and Barth, Fabio and Ott, Simon and Samwald, Matthias and Bach, Stephen and Biderman, Stella and S\"{a}nger, Mario and Wang, Bo and Callahan, Alison and Le\'{o}n Peri\~{n}\'{a}n, Daniel and Gigant, Th\'{e}o and Haller, Patrick and Chim, Jenny and Posada, Jose and Giorgi, John and Sivaraman, Karthik Rangasai and P\`{a}mies, Marc and Nezhurina, Marianna and Martin, Robert and Cullan, Michael and Freidank, Moritz and Dahlberg, Nathan and Mishra, Shubhanshu and Bose, Shamik and Broad, Nicholas and Labrak, Yanis and Deshmukh, Shlok and Kiblawi, Sid and Singh, Ayush and Vu, Minh Chien and Neeraj, Trishala and Golde, Jonas and Villanova del Moral, Albert and Beilharz, Benjamin},
 booktitle = {Advances in Neural Information Processing Systems},
 editor = {S. Koyejo and S. Mohamed and A. Agarwal and D. Belgrave and K. Cho and A. Oh},
 pages = {25792--25806},
 publisher = {Curran Associates, Inc.},
 title = {BigBio: A Framework for Data-Centric Biomedical Natural Language Processing},
 url = {https://proceedings.neurips.cc/paper_files/paper/2022/file/a583d2197eafc4afdd41f5b8765555c5-Paper-Datasets_and_Benchmarks.pdf},
 volume = {35},
 year = {2022}
}

@article{smith2022language,
  title={Language Models in the Loop: Incorporating Prompting into Weak Supervision},
  author={Smith, *Ryan and Fries, *Jason Alan and Hancock, Braden and Bach, Stephen H},
  journal={ACM/IMS Journal of Data Science},
  year={2023},
  url={https://jds.acm.org/files/JDS_Issue2_Paper2.pdf}
}

@inproceedings{blankemeier-etal-2023-efficient,
    title = "Efficient Diagnosis Assignment Using Unstructured Clinical Notes",
    author = "Blankemeier, Louis  and
      Fries, Jason  and
      Tinn, Robert  and
      Preston, Joseph  and
      Shah, Nigam  and
      Chaudhari, Akshay",
    editor = "Rogers, Anna  and
      Boyd-Graber, Jordan  and
      Okazaki, Naoaki",
    booktitle = "Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)",
    month = jul,
    year = "2023",
    address = "Toronto, Canada",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2023.acl-short.42",
    doi = "10.18653/v1/2023.acl-short.42",
    pages = "485--494",
    abstract = "Electronic phenotyping entails using electronic health records (EHRs) to identify patients with specific health outcomes and determine when those outcomes occurred. Unstructured clinical notes, which contain a vast amount of information, are a valuable resource for electronic phenotyping. However, traditional methods, such as rule-based labeling functions or neural networks, require significant manual effort to tune and may not generalize well to multiple indications. To address these challenges, we propose \textit{HyDE} (hybrid diagnosis extractor). HyDE is a simple framework for electronic phenotyping that integrates labeling functions and a disease-agnostic neural network to assign diagnoses to patients. By training HyDE{'}s model to correct predictions made by labeling functions, we are able to disambiguate hypertension true positives and false positives with a supervised area under the precision-recall curve (AUPRC) of 0.85. We extend this hypertension-trained model to zero-shot evaluation of four other diseases, generating AUPRC values ranging from 0.82 - 0.95 and outperforming a labeling function baseline by 44 points in F1 score and a Word2Vec baseline by 24 points in F1 score on average. Furthermore, we demonstrate a speedup of {\textgreater}4x by pruning the length of inputs into our language model to {\textasciitilde}2.3{\%} of the full clinical notes, with negligible impact to the AUPRC. HyDE has the potential to improve the efficiency and efficacy of interpreting large-scale unstructured clinical notes for accurate EHR phenotyping.",
}

@article{miner2022computational,
  title={A computational approach to measure the linguistic characteristics of psychotherapy timing, responsiveness, and consistency},
  author={Miner, Adam S and Fleming, Scott L and Haque, Albert and Fries, Jason A and Althoff, Tim and Wilfley, Denise E and Agras, W Stewart and Milstein, Arnold and Hancock, Jeff and Asch, Steven M and others},
  journal={npj Mental Health Research},
  volume={1},
  number={1},
  pages={19},
  year={2022},
  url={https://www.nature.com/articles/s44184-022-00020-9},
  publisher={Nature Publishing Group UK London}
}

@inproceedings{
fries2022dataset,
title={Dataset Debt in Biomedical Language Modeling},
author={Jason Fries and Natasha Seelam and Gabriel Altay and Leon Weber and Myungsun Kang and Debajyoti Datta and Ruisi Su and Samuele Garda and Bo Wang and Simon Ott and Matthias Samwald and Wojciech Kusa},
booktitle={Challenges {\&} Perspectives in Creating Large Language Models},
year={2022},
url={https://openreview.net/forum?id=HRfzInfr8Z9}
}

@article{guo2022evaluation,
  title={Evaluation of domain generalization and adaptation on improving model robustness to temporal dataset shift in clinical medicine},
  author={Guo, Lin Lawrence and Pfohl, Stephen R and Fries, Jason and Johnson, Alistair EW and Posada, Jose and Aftandilian, Catherine and Shah, Nigam and Sung, Lillian},
  journal={Scientific reports},
  volume={12},
  number={1},
  pages={1--10},
  year={2022},
  url={https://www.nature.com/articles/s41598-022-06484-1},
  doi={https://doi.org/10.1038/s41598-022-06484-1},
  publisher={Nature Publishing Group}
}

@inproceedings{bach-etal-2022-promptsource,
    title = "{P}rompt{S}ource: An Integrated Development Environment and Repository for Natural Language Prompts",
    author = "Bach, Stephen  and
      Sanh, Victor  and
      Yong, Zheng Xin  and
      Webson, Albert  and
      Raffel, Colin  and
      Nayak, Nihal V.  and
      Sharma, Abheesht  and
      Kim, Taewoon  and
      Bari, M Saiful  and
      Fevry, Thibault  and
      Alyafeai, Zaid  and
      Dey, Manan  and
      Santilli, Andrea  and
      Sun, Zhiqing  and
      Ben-david, Srulik  and
      Xu, Canwen  and
      Chhablani, Gunjan  and
      Wang, Han  and
      Fries, Jason  and
      Al-shaibani, Maged  and
      Sharma, Shanya  and
      Thakker, Urmish  and
      Almubarak, Khalid  and
      Tang, Xiangru  and
      Radev, Dragomir  and
      Jiang, Mike Tian-jian  and
      Rush, Alexander",
    booktitle = "Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics: System Demonstrations",
    month = may,
    year = "2022",
    address = "Dublin, Ireland",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2022.acl-demo.9",
    doi = "10.18653/v1/2022.acl-demo.9",
    pages = "93--104",
    abstract = "PromptSource is a system for creating, sharing, and using natural language prompts. Prompts are functions that map an example from a dataset to a natural language input and target output. Using prompts to train and query language models is an emerging area in NLP that requires new tools that let users develop and refine these prompts collaboratively. PromptSource addresses the emergent challenges in this new setting with (1) a templating language for defining data-linked prompts, (2) an interface that lets users quickly iterate on prompt development by observing outputs of their prompts on many examples, and (3) a community-driven set of guidelines for contributing new prompts to a common pool. Over 2,000 prompts for roughly 170 datasets are already available in PromptSource. PromptSource is available at https://github.com/bigscience-workshop/promptsource.",
}

@article{zhou2021radfusion,
  title={RadFusion: Benchmarking Performance and Fairness for Multimodal Pulmonary Embolism Detection from CT and EHR},
  author={Zhou, Yuyin and Huang, Shih-Cheng and Fries, Jason and Youssef, Alaa and Amrhein, Timothy J and Chang, Marcello and Banerjee, Imon and Rubin, Daniel and Xing, Lei and Shah, Nigam and others},
  journal={arXiv preprint arXiv:2111.11665},
  url={https://arxiv.org/abs/2111.11665},
  year={2021}
}

@inproceedings{
sanh2022multitask,
title={Multitask Prompted Training Enables Zero-Shot Task Generalization},
author={Victor Sanh and Albert Webson and Colin Raffel and Stephen Bach and Lintang Sutawika and Zaid Alyafeai and Antoine Chaffin and Arnaud Stiegler and Arun Raja and Manan Dey and M Saiful Bari and Canwen Xu and Urmish Thakker and Shanya Sharma Sharma and Eliza Szczechla and Taewoon Kim and Gunjan Chhablani and Nihal Nayak and Debajyoti Datta and Jonathan Chang and Mike Tian-Jian Jiang and Han Wang and Matteo Manica and Sheng Shen and Zheng Xin Yong and Harshit Pandey and Rachel Bawden and Thomas Wang and Trishala Neeraj and Jos Rozen and Abheesht Sharma and Andrea Santilli and Thibault Fevry and Jason Fries and Ryan Teehan and Teven Le Scao and Stella Biderman and Leo Gao and Thomas Wolf and Alexander M Rush},
booktitle={International Conference on Learning Representations},
year={2022},
url={https://openreview.net/forum?id=9Vrb9D0WI4}
}

@article{guo2021systematic,
  title={Systematic Review of Approaches to Preserve Machine Learning Performance in the Presence of Temporal Dataset Shift in Clinical Medicine},
  author={Guo, Lin Lawrence and Pfohl, Stephen R and Fries, Jason and Posada, Jose and Fleming, Scott Lanyon and Aftandilian, Catherine and Shah, Nigam and Sung, Lillian},
  journal={Applied Clinical Informatics},
  volume={12},
  number={04},
  pages={808--815},
  year={2021},
  url={https://pubmed.ncbi.nlm.nih.gov/34470057/},
  publisher={Georg Thieme Verlag KG}
}

@article{fries2021ontology,
  title={Ontology-driven weak supervision for clinical entity classification in electronic health records},
  author={Fries, Jason and Steinberg, Ethan and Khattar, Saelig and Fleming, Scott L and Posada, Jose and Callahan, Alison and Shah, Nigam H},
  journal={Nature communications},
  volume={12},
  number={1},
  pages={1--11},
  year={2021},
  url={https://www.nature.com/articles/s41467-021-22328-4},
  doi={https://doi.org/10.1038/s41467-021-22328-4},
  publisher={Nature Publishing Group}
}

@article{STEINBERG2021103637,
title = {Language models are an effective representation learning technique for electronic health record data},
journal = {Journal of Biomedical Informatics},
volume = {113},
pages = {103637},
year = {2021},
issn = {1532-0464},
doi = {https://doi.org/10.1016/j.jbi.2020.103637},
url = {https://www.sciencedirect.com/science/article/pii/S1532046420302653},
author = {Ethan Steinberg and Ken Jung and Jason Fries and Conor K. Corbin and Stephen R. Pfohl and Nigam H. Shah},
keywords = {Electronic health record, Representation learning, Transfer learning, Risk stratification, Machine learning},
abstract = {Widespread adoption of electronic health records (EHRs) has fueled the development of using machine learning to build prediction models for various clinical outcomes. However, this process is often constrained by having a relatively small number of patient records for training the model. We demonstrate that using patient representation schemes inspired from techniques in natural language processing can increase the accuracy of clinical prediction models by transferring information learned from the entire patient population to the task of training a specific model, where only a subset of the population is relevant. Such patient representation schemes enable a 3.5% mean improvement in AUROC on five prediction tasks compared to standard baselines, with the average improvement rising to 19% when only a small number of patient records are available for training the clinical prediction model.}
}

@article{callahan2020estimating,
  title={Estimating the efficacy of symptom-based screening for COVID-19},
  author={Callahan, Alison and Steinberg, Ethan and Fries, Jason and Gombar, Saurabh and Patel, Birju and Corbin, Conor K and Shah, Nigam H},
  journal={NPJ digital medicine},
  volume={3},
  number={1},
  pages={1--3},
  year={2020},
  url={https://www.nature.com/articles/s41746-020-0300-0},
  doi={https://doi.org/10.1038/s41746-020-0300-0},
  publisher={Nature Publishing Group}
}

@article{miner2020assessing,
  title={Assessing the accuracy of automatic speech recognition for psychotherapy},
  author={Miner, Adam S and Haque, Albert and Fries, Jason and Fleming, Scott L and Wilfley, Denise E and Terence Wilson, G and Milstein, Arnold and Jurafsky, Dan and Arnow, Bruce A and Stewart Agras, W and others},
  journal={NPJ digital medicine},
  volume={3},
  number={1},
  pages={1--8},
  year={2020},
  url={https://www.nature.com/articles/s41746-020-0285-8},
  publisher={Nature Publishing Group},
  doi={https://doi.org/10.1038/s41746-020-0285-8}
}

@article{callahan2019medical,
  title={Medical device surveillance with electronic health records},
  author={Callahan, Alison and Fries, Jason and R{\'e}, Christopher and Huddleston, James I and Giori, Nicholas J and Delp, Scott and Shah, Nigam H},
  journal={NPJ digital medicine},
  volume={2},
  number={1},
  pages={1--10},
  year={2019},
  url={https://www.nature.com/articles/s41746-019-0168-z},
  publisher={Nature Publishing Group},
  doi={https://doi.org/10.1038/s41746-019-0168-z}
}

@inproceedings{ratner2017snorkel,
  title={Snorkel: Rapid training data creation with weak supervision},
  author={Ratner, Alexander and Bach, Stephen H and Ehrenberg, Henry and Fries, Jason and Wu, Sen and R{\'e}, Christopher},
  booktitle={Proceedings of the VLDB Endowment. International Conference on Very Large Data Bases},
  volume={11},
  number={3},
  pages={269},
  year={2017},
  url={https://dl.acm.org/doi/10.14778/3157794.3157797},
  organization={NIH Public Access},
  doi={https://doi.org/10.14778/3157794.3157797}
}

@inproceedings{DBLP:conf/nips/VarmaSSFFKRXFPR19,
  author={Paroma Varma and Frederic Sala and Shiori Sagawa and Jason Fries and Daniel Y. Fu and Saelig Khattar and Ashwini Ramamoorthy and Ke Xiao and Kayvon Fatahalian and James Priest and Christopher Ré},
  title={Multi-Resolution Weak Supervision for Sequential Data},
  year={2019},
  cdate={1546300800000},
  pages={192-203},
  url={http://papers.nips.cc/paper/8313-multi-resolution-weak-supervision-for-sequential-data},
  booktitle={NeurIPS},
  crossref={conf/nips/2019}
}

@article{fries2019weakly,
  title={Weakly supervised classification of aortic valve malformations using unlabeled cardiac MRI sequences},
  author={Fries, Jason and Varma, Paroma and Chen, Vincent S and Xiao, Ke and Tejeda, Heliodoro and Saha, Priyanka and Dunnmon, Jared and Chubb, Henry and Maskatia, Shiraz and Fiterau, Madalina and others},
  journal={Nature communications},
  volume={10},
  number={1},
  pages={1--10},
  year={2019},
  url={https://www.nature.com/articles/s41467-019-11012-3},
  publisher={Nature Publishing Group},
  doi={https://doi.org/10.1038/s41467-019-11012-3}
}

@inproceedings{fries-2016-brundlefly,
    title = "Brundlefly at {S}em{E}val-2016 Task 12: Recurrent Neural Networks vs. Joint Inference for Clinical Temporal Information Extraction",
    author = "Fries, Jason",
    booktitle = "Proceedings of the 10th International Workshop on Semantic Evaluation ({S}em{E}val-2016)",
    month = jun,
    year = "2016",
    address = "San Diego, California",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/S16-1198",
    doi = "10.18653/v1/S16-1198",
    pages = "1274--1279",
}

@article{fries2017swellshark,
  title={Swellshark: A generative model for biomedical named entity recognition without labeled data},
  author={Fries, Jason and Wu, Sen and Ratner, Alex and R{\'e}, Christopher},
  journal={arXiv preprint arXiv:1704.06360},
  url={https://arxiv.org/abs/1704.06360},
  year={2017}
}

Jason Alan Fries

I'm currently a research scientist in the Shah Lab at Stanford University. Previously I was a CS postdoc in Stanford's Mobilize Center, mentored by Chris Ré and Scott Delp.

I'm interested in tools and methods that enable domain experts to rapidly build and modify machine learning models. I'm most passionate about medical application of machine learning, where obtaining large-scale, expert-labeled training data is a significant challenge. My research focuses on weakly supervised machine learning, training foundation models for medicine, and methods for data-centric AI.

CV Google Scholar ResearchGate GitHub Twitter

Recent Talks

[2023-12-15] Presented "Model Hubs for Medical AI: How far is our Hugging Face Moment?" at the University of Pittsburgh's Health Informatics Grand Grounds [slides]
[2023-06-06] Presented "Generative AI for Electronic Health Records" at the 2023 AIMI Symposium: Generative AI in Health [slides] [video]
[2023-04-13] Presented "Realizing the Promise of Foundation Models in Healthcare" at Stanford's BMIR colloquium [video]
[2023-02-27] Check out our Stanford HAI blog post on the problematic state of medical foundation model evaluation "The Shaky Foundations of Foundation Models in Healthcare"
[2022-12-15] Check out our Stanford HAI blog post on the promise of foundation models in healthcare "How Foundation Models Can Advance AI in Healthcare"
[2022-12-01] Presented "BigBio: A Framework for Data-Centric Biomedical Natural Language Processing" at NeurIPS 2022
[2022-11-16] Presented "The Road to Data-Centric Machine Learning in Healthcare" at the 2022 INFORMS Annual Meeting
[2022-08-03] Took part in a great panel discuss with Steve Bach, Fred Sala and Fait Poms at The Future of Data-Centric AI Workshop
[2022-05-25] Presented at the National Academies of Sciences, Engineering, and Medicine's workshop on Artificial Intelligence and Open Data Practices in Chemical Hazard Assessment
[2022-04-02] Official launch of the BigScience Biomedical Hackathon! We're implementing over 150 datasets in a common framework for use in large-scale language modeling.
[2021-08-07] Moderating a breakout session on "Few and Zero-shot Learning in Medicine" at Machine Learning for Health Care 2021
[2021-08-03] Gave a talk on at Stanford's AIMI Symposium 2021 on "Data-centric Medical AI with Weak Supervision". Video Recoding Coming Soon!
[2021-06-16] I was a guest on Snorkel AI's Science Talks, where I discuss some of the early history of Snorkel and my work on weak supervision for medicine Blog Video
[2021-04-15] Gave a talk to Stanford's MedAI Group on weakly supervised learning in medicine. Video

Projects

Healthcare Foundation Models: Electronic health records (EHR) capture complex, heterogenous data about patient health, including structured medical codes, unstructured patient notes, imaging and more. EHR foundation models offer a promising path towards combining these data streams to improve model performance, adaptability, and facilitate novel approaches for clinician-in-the-loop AI systems.

Large Language Models: The BigScience 2021 Workshop: The Summer of Language Models is an excited international collaboration aimed at training a very large and open language model for the research community. I'm co-chair of the biomedical working group and participant in the modeling working group. We are currently putting together a collection of over 150 expert-labeled biomedical datasets tailored for use in large-scale language modeling.

Weak Supervision: I am a co-developer and contributor to Stanford's weak supervision framework Snorkel. I have papers on weakly supervised biomedical concept tagging, machine reading in the electronic health record (EHR), and classifying rare aortic valve diseases in cardiac MRI videos from the UK Biobank, an open, population-scale health dataset.

Community

Area chair for Machine Learning for Healthcare (MLHC) 2019-2021
Co-organizer for Machine Learning for Health (ML4H) @ NeurIPS 2016-2018
Co-organizer Learning to Run Challenge @ NeurIPS 2017

Select Publications

*co-first authorship †co-senior authorship