@article {Leal508, author = {Jeffrey Leal and Steven Rowe and Lilja Solnes and Martin Pomper}, title = {Assessment of AI to quantitatively assess disease in PET/CT as compared to a human observer}, volume = {61}, number = {supplement 1}, pages = {508--508}, year = {2020}, publisher = {Society of Nuclear Medicine}, abstract = {508Objectives: To further evaluate the performance of a convolutional neural network (CNN) trained to identify breast CA in 18F-FDG PET-CT studies and assess the auto-generated quantitative metrics generated by the CNN to those of a human observer. The ultimate performance of an artificial intelligence driven image analysis system will lie not only in its ability to identify areas of disease, but to accurately quantitatively assess these regions as well. Methods: This work builds upon previously reported work (1, 2) in which we used 53 baseline PET/CT studies from an IRB-approved multi-institutional clinical trial of breast CA to train a CNN to automatically identify and label FDG avid tissue into one of 13 tissue classes. These included 10 classes of tissue expressing normal FDG avidity, e.g. brain, kidney, bladder, etc., and 3 classes of disease tissue, i.e. primary lesion, lymphadenopathy, and other metastasis. In addition to these classes, the system was also trained to identify air, nominal activity and reference activity (within the liver). Using this trained network, we then processed 67 baseline PET/CT studies taken from an entirely different IRB-approved multi-institutional clinical trial of breast CA to generate voxel-based tissue classification maps. These maps were then transformed into volumes-of-interest which were used to take quantitative measurements of disease from the images, including MAX-SUV and PEAK-SUV. Results: Sixty-seven baseline PET/CT studies were processed using the previously trained CNN to detect areas of Breast CA. Manual disease localization and quantification by a trained reader identified lesions in 60 of the 67 studies where lesion avidity could be threshold-defined by using the mean + 2 SDs of a reference region measured in the liver, whereas the CNN identified localized disease in 64 of the 67 studies. Of the 60 studies identified by manual review, the CNN identified the identical lesion and measured the same MAX-SUL with a tolerance of +/- 5\% in 55 (92\%) of the cases, and within a tolerance of +/- 10\% in 56 (93\%) of the cases. For these same studies, the identical PEAK-SUV measurement was made within +/- 5\% of the manually derived value in 56 (93\%) of the cases, and within +/- 10\% of the manually measured value in 58 (97\%) of the cases. Of the 7 cases in which the manual observer was unable to threshold-define a measurable lesion, the CNN was able to accurately identify and measure 4 lesions; in two it mis-classified tissue and in 1 it failed to identify any disease. Conclusions: Systems based on artificial intelligence are beginning to find their way through a variety of avenues into the practice of molecular imaging, as well as medical imaging as a whole. These systems promise increased efficiency and accuracy in the interpretation and evaluation of medical images. To fulfill these promises, careful and thoughtful analysis of the performance of these networks is needed to help guide future developments as well as to assess where their true value lies as an assistant to the molecular imaging practitioner.}, issn = {0161-5505}, URL = {https://jnm.snmjournals.org/content/61/supplement_1/508}, eprint = {https://jnm.snmjournals.org/content}, journal = {Journal of Nuclear Medicine} }