booktitle={{CLEF} 2017 {Evaluation} {Labs} and {Workshop}: {Online} {Working} {Notes}, {CEUR}-{WS}},
author={Névéol, Aurélie and Anderson, Robert N. and Cohen, K. Bretonnel and Grouin, Cyril and Lavergne, Thomas and Rey, Grégoire and Robert, Aude and Rondet, Claire and Zweigenbaum, Pierre},
year={2017},
keywords={Read},
pages={17},
file={Fulltext:/Users/mario/Zotero/storage/8QATUX6Q/Névéol et al. - 2017 - CLEF eHealth 2017 Multilingual Information Extract.pdf:application/pdf;Snapshot:/Users/mario/Zotero/storage/EV2SLCV8/Névéol et al. - 2017 - CLEF eHealth 2017 Multilingual Information Extract.pdf:application/pdf}
}
@inproceedings{miftakhutdinov_kfu_2017,
title={Kfu at clef ehealth 2017 task 1: {Icd}-10 coding of english death certificates with recurrent neural networks},
booktitle={International {Conference} of the {Cross}-{Language} {Evaluation} {Forum} for {European} {Languages}},
publisher={Springer},
author={Goeuriot, Lorraine and Kelly, Liadh and Suominen, Hanna and Névéol, Aurélie and Robert, Aude and Kanoulas, Evangelos and Spijker, Rene and Palotti, Joao and Zuccon, Guido},
Cancer monitoring and prevention relies on the critical aspect of timely notification of cancer cases. However, the abstraction and classification of cancer from the free-text of pathology reports and other relevant documents, such as death certificates, exist as complex and time-consuming activities.
Aims
In this paper, approaches for the automatic detection of notifiable cancer cases as the cause of death from free-text death certificates supplied to Cancer Registries are investigated.
Method
A number of machine learning classifiers were studied. Features were extracted using natural language techniques and the Medtex toolkit. The numerous features encompassed stemmed words, bi-grams, and concepts from the SNOMED CT medical terminology. The baseline consisted of a keyword spotter using keywords extracted from the long description of ICD-10 cancer related codes.
Results
Death certificates with notifiable cancer listed as the cause of death can be effectively identified with the methods studied in this paper. A Support Vector Machine (SVM) classifier achieved best performance with an overall Fmeasure of 0.9866 when evaluated on a set of 5,000 freetext death certificates using the token stem feature set. The SNOMED CT concept plus token stem feature set reached the lowest variance (0.0032) and false negative rate (0.0297) while achieving an F-measure of 0.9864. The SVM classifier accounts for the first 18 of the top 40 evaluated runs, and entails the most robust classifier with a variance of 0.001141, half the variance of the other classifiers.
Conclusion
The selection of features significantly produced the most influences on the performance of the classifiers, although the type of classifier employed also affects performance. In contrast, the feature weighting schema created a negligible effect on performance. Specifically, it is found that stemmed tokens with or without SNOMED CT concepts create the most effective feature when combined with an SVM classifier.},
number={5},
urldate={2018-03-16},
journal={The Australasian Medical Journal},
author={Butt, Luke and Zuccon, Guido and Nguyen, Anthony and Bergheim, Anton and Grayson, Narelle},
month=may,
year={2013},
pmid={23745151},
pmcid={PMC3674421},
pages={292--299},
file={PubMed Central Full Text PDF:/Users/mario/Zotero/storage/ZCUHSCHR/Butt et al. - 2013 - Classification of cancer-related death certificate.pdf:application/pdf}
}
@article{koopman_automatic_2015,
title={Automatic {ICD}-10 classification of cancers from free-text death certificates},
Death certificates provide an invaluable source for cancer mortality statistics; however, this value can only be realised if accurate, quantitative data can be extracted from certificates – an aim hampered by both the volume and variable nature of certificates written in natural language. This paper proposes an automatic classification system for identifying cancer related causes of death from death certificates.
Methods
Detailed features, including terms, n-grams and SNOMED CT concepts were extracted from a collection of 447,336 death certificates. These features were used to train Support Vector Machine classifiers (one classifier for each cancer type). The classifiers were deployed in a cascaded architecture: the first level identified the presence of cancer (i.e., binary cancer/nocancer) and the second level identified the type of cancer (according to the ICD-10 classification system). A held-out test set was used to evaluate the effectiveness of the classifiers according to precision, recall and F-measure. In addition, detailed feature analysis was performed to reveal the characteristics of a successful cancer classification model.
Results
The system was highly effective at identifying cancer as the underlying cause of death (F-measure 0.94). The system was also effective at determining the type of cancer for common cancers (F-measure 0.7). Rare cancers, for which there was little training data, were difficult to classify accurately (F-measure 0.12). Factors influencing performance were the amount of training data and certain ambiguous cancers (e.g., those in the stomach region). The feature analysis revealed a combination of features were important for cancer type classification, with SNOMED CT concept and oncology specific morphology features proving the most valuable.
Conclusion
The system proposed in this study provides automatic identification and characterisation of cancers from large collections of free-text death certificates. This allows organisations such as Cancer Registries to monitor and report on cancer mortality in a timely and accurate manner. In addition, the methods and findings are generally applicable beyond cancer classification and to other sources of medical text besides death certificates.},
number={11},
urldate={2018-03-16},
journal={International Journal of Medical Informatics},
author={Koopman, Bevan and Zuccon, Guido and Nguyen, Anthony and Bergheim, Anton and Grayson, Narelle},
month=nov,
year={2015},
pages={956--965},
file={ScienceDirect Full Text PDF:/Users/mario/Zotero/storage/P8HLCZWK/Koopman et al. - 2015 - Automatic ICD-10 classification of cancers from fr.pdf:application/pdf;ScienceDirect Snapshot:/Users/mario/Zotero/storage/X3AKYDDI/S1386505615300289.html:text/html}
}
@inproceedings{sutskever_sequence_2014,
@inproceedings{sutskever_sequence_2014,
title={Sequence to sequence learning with neural networks},
title={Sequence to sequence learning with neural networks},
booktitle={Advances in neural information processing systems},
booktitle={Advances in neural information processing systems},
...
@@ -132,4 +220,102 @@
...
@@ -132,4 +220,102 @@
year={2014},
year={2014},
pages={1724--1734},
pages={1724--1734},
file={Full Text PDF:/Users/mario/Zotero/storage/4NE9THT8/Cho et al. - 2014 - Learning Phrase Representations using RNN Encoder–.pdf:application/pdf}
file={Full Text PDF:/Users/mario/Zotero/storage/4NE9THT8/Cho et al. - 2014 - Learning Phrase Representations using RNN Encoder–.pdf:application/pdf}
}
@article{neveol_clinical_2016,
title={Clinical {Information} {Extraction} at the {CLEF} {eHealth} {Evaluation} lab 2016},
abstract={This paper reports on Task 2 of the 2016 CLEF eHealth evaluation lab which extended the previous information extraction tasks of ShARe/CLEF eHealth evaluation labs. The task continued with named entity recognition and normalization in French narratives, as offered in CLEF eHealth 2015. Named entity recognition involved ten types of entities including disorders that were defined according to Semantic Groups in the Unified Medical Language System® (UMLS®), which was also used for normalizing the entities. In addition, we introduced a large-scale classification task in French death certificates, which consisted of extracting causes of death as coded in the International Classification of Diseases, tenth revision (ICD10). Participant systems were evaluated against a blind reference standard of 832 titles of scientific articles indexed in MEDLINE, 4 drug monographs published by the European Medicines Agency (EMEA) and 27,850 death certificates using Precision, Recall and F-measure. In total, seven teams participated, including five in the entity recognition and normalization task, and five in the death certificate coding task. Three teams submitted their systems to our newly offered reproducibility track. For entity recognition, the highest performance was achieved on the EMEA corpus, with an overall F-measure of 0.702 for plain entities recognition and 0.529 for normalized entity recognition. For entity normalization, the highest performance was achieved on the MEDLINE corpus, with an overall F-measure of 0.552. For death certificate coding, the highest performance was 0.848 F-measure.},
urldate={2018-05-23},
journal={CEUR workshop proceedings},
author={Névéol, Aurélie and Cohen, K. Bretonnel and Grouin, Cyril and Hamon, Thierry and Lavergne, Thomas and Kelly, Liadh and Goeuriot, Lorraine and Rey, Grégoire and Robert, Aude and Tannier, Xavier and Zweigenbaum, Pierre},
month=sep,
year={2016},
pmid={29308065},
pmcid={PMC5756095},
pages={28--42},
file={PubMed Central Full Text PDF:/Users/mario/Zotero/storage/ZWWRZSZK/Névéol et al. - 2016 - Clinical Information Extraction at the CLEF eHealt.pdf:application/pdf}
}
@inproceedings{di_nunzio_lexicon_2017,
title={A {Lexicon} {Based} {Approach} to {Classification} of {ICD}10 {Codes}. {IMS} {Unipd} at {CLEF} {eHealth} {Task}},
author={Di Nunzio, Giorgio Maria and Beghini, Federica and Vezzani, Federica and Henrot, Genevieve},
year={2017},
file={Fulltext:/Users/mario/Zotero/storage/HGHINDH3/Di Nunzio et al. - A Lexicon Based Approach to Classification of ICD1.pdf:application/pdf;Snapshot:/Users/mario/Zotero/storage/LWSDB84Q/Di Nunzio et al. - A Lexicon Based Approach to Classification of ICD1.pdf:application/pdf}
}
@inproceedings{cabot_sibm_2016,
title={{SIBM} at {CLEF} {eHealth} {Evaluation} {Lab} 2016: {Extracting} {Concepts} in {French} {Medical} {Texts} with {ECMT} and {CIMIND}},
author={van Mulligen, Erik M. and Afzal, Zubair and Akhondi, Saber A. and Vo, Dang and Kors, Jan A.},
year={2016},
file={Fulltext:/Users/mario/Zotero/storage/AT3LSRP4/van Mulligen et al. - Erasmus MC at CLEF eHealth 2016 Concept Recogniti.pdf:application/pdf;Snapshot:/Users/mario/Zotero/storage/5NNCIN2V/van Mulligen et al. - Erasmus MC at CLEF eHealth 2016 Concept Recogniti.pdf:application/pdf}
file={Fulltext:/Users/mario/Zotero/storage/AW2YGWHC/Jonnagaddala und Hu - Automatic coding of death certificates to ICD-10 t.pdf:application/pdf;Snapshot:/Users/mario/Zotero/storage/VHWNWWPC/Jonnagaddala und Hu - Automatic coding of death certificates to ICD-10 t.pdf:application/pdf}
}
@inproceedings{ho-dac_litl_2017,
title={{LITL} at {CLEF} {eHealth}2017: automatic classification of death reports},
author={Ho-Dac, Lydia-Mai and Fabre, Cécile and Birski, Anouk and Boudraa, Imane and Bourriot, Aline and Cassier, Manon and Delvenne, Léa and Garcia-Gonzalez, Charline and Kang, Eun-Bee and Piccinini, Elisa},
year={2017},
file={Fulltext:/Users/mario/Zotero/storage/N2Q47RVL/Ho-Dac et al. - LITL at CLEF eHealth2017 automatic classification.pdf:application/pdf;Snapshot:/Users/mario/Zotero/storage/D5T3NUAR/Ho-Dac et al. - LITL at CLEF eHealth2017 automatic classification.pdf:application/pdf}
}
@inproceedings{ho-dac_litl_2016,
title={{LITL} at {CLEF} {eHealth}2016: recognizing entities in {French} biomedical documents},
author={Ho-Dac, Lydia-Mai and Tanguy, Ludovic and Grauby, Céline and Mby, Aurore Heu and Malosse, Justine and Rivière, Laura and Veltz-Mauclair, Amélie},
year={2016},
file={Fulltext:/Users/mario/Zotero/storage/9YCE3EVM/Ho-Dac et al. - LITL at CLEF eHealth2016 recognizing entities in .pdf:application/pdf;Snapshot:/Users/mario/Zotero/storage/I6YEA4ZT/Ho-Dac et al. - LITL at CLEF eHealth2016 recognizing entities in .pdf:application/pdf}
}
@inproceedings{dermouche_ecstra-inserm_2016,
title={{ECSTRA}-{INSERM}@ {CLEF} {eHealth}2016-task 2: {ICD}10 {Code} {Extraction} from {Death} {Certificates}},
author={Ebersbach, Mike and Herms, Robert and Eibl, Maximilian},
year={2017},
file={Fulltext:/Users/mario/Zotero/storage/LKIZA2P4/Ebersbach et al. - 2017 - Fusion Methods for ICD10 Code Classification of De.pdf:application/pdf;Snapshot:/Users/mario/Zotero/storage/CIX48RIC/Ebersbach et al. - 2017 - Fusion Methods for ICD10 Code Classification of De.pdf:application/pdf}