@inproceedings{35e3572066d046f291191052972739c6,
title = "FG-CXR: A Radiologist-Aligned Gaze Dataset for Enhancing Interpretability in Chest X-Ray Report Generation",
abstract = "Developing an interpretable system for generating reports in chest X-ray (CXR) analysis is becoming increasingly crucial in Computer-aided Diagnosis (CAD) systems, enabling radiologists to comprehend the decisions made by these systems. Despite the growth of diverse datasets and methods focusing on report generation, there remains a notable gap in how closely these models{\textquoteright}s generated reports align with the interpretations of real radiologists. In this study, we tackle this challenge by initially introducing Fine-Grained CXR (FG-CXR) dataset, which provides fine-grained paired information between the captions generated by radiologists and the corresponding gaze attention heatmaps for each anatomy. Unlike existing datasets that include a raw sequence of gaze alongside a report, with significant misalignment between gaze location and report content, our FG-CXR dataset offers a more grained alignment between gaze attention and diagnosis transcript. Furthermore, our analysis reveals that simply applying black-box image captioning methods to generate reports cannot adequately explain which information in CXR is utilized and how long needs to attend to accurately generate reports. Consequently, we propose a novel explainable radiologist{\textquoteright}s attention generator network (Gen-XAI) that mimics the diagnosis process of radiologists, explicitly constraining its output to closely align with both radiologist{\textquoteright}s gaze attention and transcript. Finally, we perform extensive experiments to illustrate the effectiveness of our method. Our datasets and checkpoint is available at https://github.com/UARK-AICV/FG-CXR.",
keywords = "Chest X-ray, CXR Dataset, Deep Learning, Intepretability, Medical Imaging, Report Generation",
author = "Pham, {Trong Thang} and Ho, {Ngoc Vuong} and Bui, {Nhat Tan} and Thinh Phan and Patel Brijesh and Donald Adjeroh and Gianfranco Doretto and Anh Nguyen and Wu, {Carol C.} and Hien Nguyen and Ngan Le",
note = "Publisher Copyright: {\textcopyright} The Author(s), under exclusive license to Springer Nature Singapore Pte Ltd. 2025.; 17th Asian Conference on Computer Vision, ACCV 2024 ; Conference date: 08-12-2024 Through 12-12-2024",
year = "2025",
doi = "10.1007/978-981-96-0960-4_5",
language = "English (US)",
isbn = "9789819609598",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "71--88",
editor = "Minsu Cho and Ivan Laptev and Du Tran and Angela Yao and Hongbin Zha",
booktitle = "Computer Vision – ACCV 2024 - 17th Asian Conference on Computer Vision, Proceedings",
address = "Germany",
}