diff --git a/paper/20_related_work.tex b/paper/20_related_work.tex index 54042b84a17d567957421ea113867b7632b02c2d..2d0c4da181cacb47bfc0512f5e31c4595cf087d1 100644 --- a/paper/20_related_work.tex +++ b/paper/20_related_work.tex @@ -32,7 +32,7 @@ linear combination of both states. \subsection{Word Embeddings} Distributional semantic models (DSMs) have been researched for decades in NLP \cite{turney_frequency_2010}. Based on a huge amount of unlabeled texts, DSMs aim to represent words using a real-valued vector (also called embedding) which captures syntactic and semantic similarities between the words. -Starting with the publication of the work from Collobert et al. \cite{collobert_natural_2011} in 2011, learning embeddings for linguistic units, such as words, sentences or paragraphs, is one of the hot topics in NLP and a plethora of approaches have been proposed \cite{bojanowski_enriching_2017,mikolov_distributed_2013,peters_deep_2018,pennington_glove_2014}. +Starting with the publication of the work from Collobert et al. \cite{collobert_natural_2011} in 2011, learning embeddings for linguistic units, such as words, sentences or paragraphs, is one of the hot topics in NLP and a plethora of approaches have been proposed \cite{bojanowski_enriching_2017,mikolov_distributed_2013,pennington_glove_2014,peters_deep_2018}. The majority of todays embedding models are based on deep learning models trained to perform some kind of language modeling task \cite{peters_semi-supervised_2017,peters_deep_2018,pinter_mimicking_2017}. The most popular embedding model is the Word2Vec model introduced by Mikolov et al. \cite{mikolov_efficient_2013,mikolov_distributed_2013}. diff --git a/paper/references.bib b/paper/references.bib deleted file mode 100644 index 4defc4fc55088aee1df69b9cb9b86d8bd4e44d46..0000000000000000000000000000000000000000 --- a/paper/references.bib +++ /dev/null @@ -1,518 +0,0 @@ - -@inproceedings{peters_deep_2018, - title = {Deep contextualized word representations}, - abstract = {We introduce a new type of deep contextualized word representation that models both (1) complex characteristics of word use (e.g., syntax and semantics), and (2) how these uses vary across...}, - urldate = {2018-02-16}, - booktitle = {The 16th {Annual} {Conference} of the {North} {American} {Chapter} of the {Association} for {Computational} {Linguistics}}, - author = {Peters, Matthew E. and Neumann, Mark and Iyyer, Mohit and Gardner, Matt and Clark, Christopher and Lee, Kenton and Zettlemoyer, Luke}, - year = {2018}, - keywords = {Context Embeedings, Document Classification, Embeddings, Read}, - file = {arXiv\:1802.05365 PDF:/Users/mario/Zotero/storage/89C2DP8R/Peters et al. - 2018 - Deep contextualized word representations.pdf:application/pdf;arXiv.org Snapshot:/Users/mario/Zotero/storage/YF7GZNUI/1802.html:text/html;Full Text PDF:/Users/mario/Zotero/storage/2SWMPWEA/Peters et al. - 2018 - Deep contextualized word representations.pdf:application/pdf;Snapshot:/Users/mario/Zotero/storage/9X2UN33P/forum.html:text/html} -} - -@inproceedings{neveol_clef_2017, - title = {{CLEF} {eHealth} 2017 {Multilingual} {Information} {Extraction} task overview: {ICD}10 coding of death certificates in {English} and {French}}, - shorttitle = {{CLEF} {eHealth} 2017 {Multilingual} {Information} {Extraction} task overview}, - booktitle = {{CLEF} 2017 {Evaluation} {Labs} and {Workshop}: {Online} {Working} {Notes}, {CEUR}-{WS}}, - author = {Névéol, Aurélie and Anderson, Robert N. and Cohen, K. Bretonnel and Grouin, Cyril and Lavergne, Thomas and Rey, Grégoire and Robert, Aude and Rondet, Claire and Zweigenbaum, Pierre}, - year = {2017}, - keywords = {Read}, - pages = {17}, - file = {Fulltext:/Users/mario/Zotero/storage/8QATUX6Q/Névéol et al. - 2017 - CLEF eHealth 2017 Multilingual Information Extract.pdf:application/pdf;Snapshot:/Users/mario/Zotero/storage/EV2SLCV8/Névéol et al. - 2017 - CLEF eHealth 2017 Multilingual Information Extract.pdf:application/pdf} -} - -@inproceedings{miftakhutdinov_kfu_2017, - title = {Kfu at clef ehealth 2017 task 1: {Icd}-10 coding of english death certificates with recurrent neural networks}, - booktitle = {{CLEF} 2017 {Online} {Working} {Notes}}, - publisher = {CEUR-WS}, - author = {Miftahutdinov, Zulfat and Tutubalina, Elena}, - year = {2017}, - keywords = {Read, CLEF, ICD-10-Classification}, - file = {Fulltext:/Users/mario/Zotero/storage/HRZ6Q8Q6/Miftakhutdinov und Tutubalina - 2017 - Kfu at clef ehealth 2017 task 1 Icd-10 coding of .pdf:application/pdf;Snapshot:/Users/mario/Zotero/storage/J8TXTUNT/Miftakhutdinov und Tutubalina - 2017 - Kfu at clef ehealth 2017 task 1 Icd-10 coding of .pdf:application/pdf} -} - -@inproceedings{goeuriot_clef_2017, - title = {Clef 2017 ehealth evaluation lab overview}, - booktitle = {International {Conference} of the {Cross}-{Language} {Evaluation} {Forum} for {European} {Languages}}, - publisher = {Springer}, - author = {Goeuriot, Lorraine and Kelly, Liadh and Suominen, Hanna and Névéol, Aurélie and Robert, Aude and Kanoulas, Evangelos and Spijker, Rene and Palotti, Joao and Zuccon, Guido}, - year = {2017}, - keywords = {Read}, - pages = {291--303}, - file = {Fulltext:/Users/mario/Zotero/storage/EEAVXG89/Goeuriot et al. - 2017 - Clef 2017 ehealth evaluation lab overview.pdf:application/pdf;Snapshot:/Users/mario/Zotero/storage/TMNBJ6YC/978-3-319-65813-1_26.html:text/html} -} - -@inproceedings{mikolov_distributed_2013, - title = {Distributed representations of words and phrases and their compositionality}, - booktitle = {Advances in neural information processing systems}, - author = {Mikolov, Tomas and Sutskever, Ilya and Chen, Kai and Corrado, Greg S. and Dean, Jeff}, - year = {2013}, - keywords = {Embeddings, Read, Word Embeddings}, - pages = {3111--3119}, - file = {Fulltext:/Users/mario/Zotero/storage/Y7PKTLQX/Mikolov et al. - 2013 - Distributed representations of words and phrases a.pdf:application/pdf;Snapshot:/Users/mario/Zotero/storage/8B9JIEQG/5021-distributed-representations-of-words-andphrases.html:text/html} -} - -@article{mikolov_efficient_2013, - title = {Efficient estimation of word representations in vector space}, - journal = {arXiv preprint arXiv:1301.3781}, - author = {Mikolov, Tomas and Chen, Kai and Corrado, Greg and Dean, Jeffrey}, - year = {2013}, - keywords = {Embeddings, Read, Word Embeddings}, - file = {Fulltext:/Users/mario/Zotero/storage/494A5KSG/Mikolov et al. - 2013 - Efficient estimation of word representations in ve.pdf:application/pdf;Snapshot:/Users/mario/Zotero/storage/84YYF44Z/1301.html:text/html} -} - -@inproceedings{pennington_glove_2014, - title = {Glove: {Global} vectors for word representation}, - shorttitle = {Glove}, - booktitle = {Proceedings of the 2014 conference on empirical methods in natural language processing ({EMNLP})}, - author = {Pennington, Jeffrey and Socher, Richard and Manning, Christopher}, - year = {2014}, - keywords = {Read, Word Embeddings}, - pages = {1532--1543}, - file = {Fulltext:/Users/mario/Zotero/storage/24PDQ7AG/Pennington et al. - 2014 - Glove Global vectors for word representation.pdf:application/pdf;Snapshot:/Users/mario/Zotero/storage/IX997EFR/Pennington et al. - 2014 - Glove Global vectors for word representation.pdf:application/pdf} -} - -@article{butt_classification_2013, - title = {Classification of cancer-related death certificates using machine learning}, - volume = {6}, - issn = {1836-1935}, - url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3674421/}, - doi = {10.4066/AMJ.2013.1654}, - abstract = {Background -Cancer monitoring and prevention relies on the critical aspect of timely notification of cancer cases. However, the abstraction and classification of cancer from the free-text of pathology reports and other relevant documents, such as death certificates, exist as complex and time-consuming activities. - -Aims -In this paper, approaches for the automatic detection of notifiable cancer cases as the cause of death from free-text death certificates supplied to Cancer Registries are investigated. - -Method -A number of machine learning classifiers were studied. Features were extracted using natural language techniques and the Medtex toolkit. The numerous features encompassed stemmed words, bi-grams, and concepts from the SNOMED CT medical terminology. The baseline consisted of a keyword spotter using keywords extracted from the long description of ICD-10 cancer related codes. - -Results -Death certificates with notifiable cancer listed as the cause of death can be effectively identified with the methods studied in this paper. A Support Vector Machine (SVM) classifier achieved best performance with an overall Fmeasure of 0.9866 when evaluated on a set of 5,000 freetext death certificates using the token stem feature set. The SNOMED CT concept plus token stem feature set reached the lowest variance (0.0032) and false negative rate (0.0297) while achieving an F-measure of 0.9864. The SVM classifier accounts for the first 18 of the top 40 evaluated runs, and entails the most robust classifier with a variance of 0.001141, half the variance of the other classifiers. - -Conclusion -The selection of features significantly produced the most influences on the performance of the classifiers, although the type of classifier employed also affects performance. In contrast, the feature weighting schema created a negligible effect on performance. Specifically, it is found that stemmed tokens with or without SNOMED CT concepts create the most effective feature when combined with an SVM classifier.}, - number = {5}, - urldate = {2018-03-16}, - journal = {The Australasian Medical Journal}, - author = {Butt, Luke and Zuccon, Guido and Nguyen, Anthony and Bergheim, Anton and Grayson, Narelle}, - month = may, - year = {2013}, - pmid = {23745151}, - pmcid = {PMC3674421}, - pages = {292--299}, - file = {PubMed Central Full Text PDF:/Users/mario/Zotero/storage/ZCUHSCHR/Butt et al. - 2013 - Classification of cancer-related death certificate.pdf:application/pdf} -} - -@article{koopman_automatic_2015, - title = {Automatic {ICD}-10 classification of cancers from free-text death certificates}, - volume = {84}, - issn = {1386-5056}, - url = {http://www.sciencedirect.com/science/article/pii/S1386505615300289}, - doi = {10.1016/j.ijmedinf.2015.08.004}, - abstract = {Objective -Death certificates provide an invaluable source for cancer mortality statistics; however, this value can only be realised if accurate, quantitative data can be extracted from certificates – an aim hampered by both the volume and variable nature of certificates written in natural language. This paper proposes an automatic classification system for identifying cancer related causes of death from death certificates. -Methods -Detailed features, including terms, n-grams and SNOMED CT concepts were extracted from a collection of 447,336 death certificates. These features were used to train Support Vector Machine classifiers (one classifier for each cancer type). The classifiers were deployed in a cascaded architecture: the first level identified the presence of cancer (i.e., binary cancer/nocancer) and the second level identified the type of cancer (according to the ICD-10 classification system). A held-out test set was used to evaluate the effectiveness of the classifiers according to precision, recall and F-measure. In addition, detailed feature analysis was performed to reveal the characteristics of a successful cancer classification model. -Results -The system was highly effective at identifying cancer as the underlying cause of death (F-measure 0.94). The system was also effective at determining the type of cancer for common cancers (F-measure 0.7). Rare cancers, for which there was little training data, were difficult to classify accurately (F-measure 0.12). Factors influencing performance were the amount of training data and certain ambiguous cancers (e.g., those in the stomach region). The feature analysis revealed a combination of features were important for cancer type classification, with SNOMED CT concept and oncology specific morphology features proving the most valuable. -Conclusion -The system proposed in this study provides automatic identification and characterisation of cancers from large collections of free-text death certificates. This allows organisations such as Cancer Registries to monitor and report on cancer mortality in a timely and accurate manner. In addition, the methods and findings are generally applicable beyond cancer classification and to other sources of medical text besides death certificates.}, - number = {11}, - urldate = {2018-03-16}, - journal = {International Journal of Medical Informatics}, - author = {Koopman, Bevan and Zuccon, Guido and Nguyen, Anthony and Bergheim, Anton and Grayson, Narelle}, - month = nov, - year = {2015}, - pages = {956--965}, - file = {ScienceDirect Full Text PDF:/Users/mario/Zotero/storage/P8HLCZWK/Koopman et al. - 2015 - Automatic ICD-10 classification of cancers from fr.pdf:application/pdf;ScienceDirect Snapshot:/Users/mario/Zotero/storage/X3AKYDDI/S1386505615300289.html:text/html} -} - -@article{turney_frequency_2010, - title = {From frequency to meaning: {Vector} space models of semantics}, - volume = {37}, - shorttitle = {From frequency to meaning}, - journal = {Journal of artificial intelligence research}, - author = {Turney, Peter D. and Pantel, Patrick}, - year = {2010}, - keywords = {Unread, Word Embeddings}, - pages = {141--188}, - file = {Snapshot:/Users/mario/Zotero/storage/9H8ZCIME/jair.html:text/html;Turney und Pantel - 2010 - From frequency to meaning Vector space models of .pdf:/Users/mario/Zotero/storage/8SPBC8M2/Turney und Pantel - 2010 - From frequency to meaning Vector space models of .pdf:application/pdf} -} - -@article{collobert_natural_2011, - title = {Natural language processing (almost) from scratch}, - volume = {12}, - number = {Aug}, - journal = {Journal of Machine Learning Research}, - author = {Collobert, Ronan and Weston, Jason and Bottou, Léon and Karlen, Michael and Kavukcuoglu, Koray and Kuksa, Pavel}, - year = {2011}, - keywords = {Unread, Word Embeddings}, - pages = {2493--2537}, - file = {Fulltext:/Users/mario/Zotero/storage/H9VZDLXY/Collobert et al. - 2011 - Natural language processing (almost) from scratch.pdf:application/pdf;Snapshot:/Users/mario/Zotero/storage/NATRPDG4/collobert11a.html:text/html} -} - -@inproceedings{sutskever_sequence_2014, - title = {Sequence to sequence learning with neural networks}, - booktitle = {Advances in neural information processing systems}, - author = {Sutskever, Ilya and Vinyals, Oriol and Le, Quoc V.}, - year = {2014}, - pages = {3104--3112}, - file = {Fulltext:/Users/mario/Zotero/storage/DVEL74Y2/Sutskever et al. - 2014 - Sequence to sequence learning with neural networks.pdf:application/pdf;Snapshot:/Users/mario/Zotero/storage/WLQ9DD95/5346-sequence-to-sequence-learning-with-neural.html:text/html} -} - -@book{hochreiter_gradient_2001, - title = {Gradient flow in recurrent nets: the difficulty of learning long-term dependencies}, - shorttitle = {Gradient flow in recurrent nets}, - publisher = {A field guide to dynamical recurrent neural networks. IEEE Press}, - author = {Hochreiter, Sepp and Bengio, Yoshua and Frasconi, Paolo and Schmidhuber, Jürgen}, - year = {2001}, - file = {Fulltext:/Users/mario/Zotero/storage/3UDDZ4LG/Hochreiter et al. - 2001 - Gradient flow in recurrent nets the difficulty of.pdf:application/pdf;Snapshot:/Users/mario/Zotero/storage/SU2LW7FM/Hochreiter et al. - 2001 - Gradient flow in recurrent nets the difficulty of.pdf:application/pdf} -} - -@inproceedings{bahdanau_neural_2018, - title = {Neural machine translation by jointly learning to align and translate}, - booktitle = {Proceedings of the 6th {International} {Conference} on {Learning} {Representations} ({ICLR} 2018)}, - author = {Bahdanau, Dzmitry and Cho, Kyunghyun and Bengio, Yoshua}, - year = {2018}, - file = {Fulltext:/Users/mario/Zotero/storage/IS5LGCET/Bahdanau et al. - 2014 - Neural machine translation by jointly learning to .pdf:application/pdf;Snapshot:/Users/mario/Zotero/storage/GR2XHEZN/1409.html:text/html} -} - -@incollection{bengio_scheduled_2015, - title = {Scheduled {Sampling} for {Sequence} {Prediction} with {Recurrent} {Neural} {Networks}}, - urldate = {2018-05-18}, - booktitle = {Advances in {Neural} {Information} {Processing} {Systems} 28}, - publisher = {Curran Associates, Inc.}, - author = {Bengio, Samy and Vinyals, Oriol and Jaitly, Navdeep and Shazeer, Noam}, - year = {2015}, - pages = {1171--1179}, - file = {NIPS Full Text PDF:/Users/mario/Zotero/storage/D2B4JCFG/Bengio et al. - 2015 - Scheduled Sampling for Sequence Prediction with Re.pdf:application/pdf;NIPS Snapshort:/Users/mario/Zotero/storage/VDKFT7GD/5956-scheduled-sampling-for-sequence-prediction-with-recurrent-neural-networks.html:text/html} -} - -@inproceedings{lample_neural_2016, - title = {Neural {Architectures} for {Named} {Entity} {Recognition}}, - booktitle = {Proceedings of the 15th {Annual} {Conference} of the {North} {American} {Chapter} of the {Association} for {Computational} {Linguistics}: {Human} {Language} {Technologies}}, - author = {Lample, Guillaume and Ballesteros, Miguel and Subramanian, Sandeep and Kawakami, Kazuya and Dyer, Chris}, - year = {2016}, - pages = {260--270}, - file = {Fulltext:/Users/mario/Zotero/storage/X563FD8L/Lample et al. - 2016 - Neural Architectures for Named Entity Recognition.pdf:application/pdf;Snapshot:/Users/mario/Zotero/storage/53KXXTTJ/Lample et al. - 2016 - Neural Architectures for Named Entity Recognition.pdf:application/pdf} -} - -@article{wei_disease_2016, - title = {Disease named entity recognition by combining conditional random fields and bidirectional recurrent neural networks}, - volume = {2016}, - journal = {Database: The Journal of Biological Databases and Curation}, - author = {Wei, Qikang and Chen, Tao and Xu, Ruifeng and He, Yulan and Gui, Lin}, - year = {2016}, - file = {Fulltext:/Users/mario/Zotero/storage/CCKZ2IWM/2630532.html:text/html;Snapshot:/Users/mario/Zotero/storage/KPKNC9SU/2630532.html:text/html} -} - -@article{wang_part--speech_2015, - title = {Part-of-speech tagging with bidirectional long short-term memory recurrent neural network}, - journal = {arXiv preprint arXiv:1510.06168}, - author = {Wang, Peilu and Qian, Yao and Soong, Frank K. and He, Lei and Zhao, Hai}, - year = {2015}, - file = {Fulltext:/Users/mario/Zotero/storage/5GR6JJQC/Wang et al. - 2015 - Part-of-speech tagging with bidirectional long sho.pdf:application/pdf;Snapshot:/Users/mario/Zotero/storage/F2FE38ZX/1510.html:text/html} -} - -@inproceedings{dyer_transition-based_2015, - title = {Transition-{Based} {Dependency} {Parsing} with {Stack} {Long} {Short}-{Term} {Memory}}, - volume = {1}, - booktitle = {Proceedings of the 53rd {Annual} {Meeting} of the {Association} for {Computational} {Linguistics} and the 7th {International} {Joint} {Conference} on {Natural} {Language} {Processing} ({Volume} 1: {Long} {Papers})}, - author = {Dyer, Chris and Ballesteros, Miguel and Ling, Wang and Matthews, Austin and Smith, Noah A.}, - year = {2015}, - pages = {334--343}, - file = {Fulltext:/Users/mario/Zotero/storage/USKW6L5G/Dyer et al. - 2015 - Transition-Based Dependency Parsing with Stack Lon.pdf:application/pdf;Snapshot:/Users/mario/Zotero/storage/ZYCTTDQF/Dyer et al. - 2015 - Transition-Based Dependency Parsing with Stack Lon.pdf:application/pdf} -} - -@article{bengio_learning_1994, - title = {Learning long-term dependencies with gradient descent is difficult}, - volume = {5}, - number = {2}, - journal = {IEEE transactions on neural networks}, - author = {Bengio, Yoshua and Simard, Patrice and Frasconi, Paolo}, - year = {1994}, - pages = {157--166}, - file = {Fulltext:/Users/mario/Zotero/storage/NSQD4YZI/Bengio et al. - 1994 - Learning long-term dependencies with gradient desc.pdf:application/pdf;Snapshot:/Users/mario/Zotero/storage/XM3MMQTY/279181.html:text/html} -} - -@article{hochreiter_long_1997, - title = {Long short-term memory}, - volume = {9}, - number = {8}, - journal = {Neural computation}, - author = {Hochreiter, Sepp and Schmidhuber, Jürgen}, - year = {1997}, - pages = {1735--1780}, - file = {Fulltext:/Users/mario/Zotero/storage/XVFURMYQ/Hochreiter und Schmidhuber - 1997 - Long short-term memory.pdf:application/pdf;Snapshot:/Users/mario/Zotero/storage/BA5KN5ZW/neco.1997.9.8.html:text/html} -} - -@inproceedings{raffel_feed-forward_2016, - title = {Feed-forward networks with attention can solve some long-term memory problems}, - booktitle = {Workshop {Extended} {Abstracts} of the 4th {International} {Conference} on {Learning} {Representations}}, - author = {Raffel, Colin and Ellis, Daniel PW}, - year = {2016}, - file = {Fulltext:/Users/mario/Zotero/storage/V3UB65AD/Raffel und Ellis - 2015 - Feed-forward networks with attention can solve som.pdf:application/pdf;Snapshot:/Users/mario/Zotero/storage/66LDNKRG/1512.html:text/html} -} - -@inproceedings{suominen_overview_2018, - series = {Lecture {Notes} in {Computer} {Science} ({LNCS})}, - title = {Overview of the {CLEF} {eHealth} {Evaluation} {Lab} 2018}, - booktitle = {{CLEF} 2018 - 8th {Conference} and {Labs} of the {Evaluation} {Forum}}, - publisher = {Springer}, - author = {Suominen, Hanna and Kelly, Liadh and Goeuriot, Lorraine and Kanoulas, Evangelos and Azzopardi, Leif and Spijker, Rene and Li, Dan and Névéol, Aurélie and Ramadier, Lionel and Robert, Aude and Zuccon, Guido and Palotti, Joao}, - year = {2018} -} - -@inproceedings{neveol_clef_2018, - title = {{CLEF} {eHealth} 2018 {Multilingual} {Information} {Extraction} task {Overview}: {ICD}10 {Coding} of {Death} {Certificates} in {French}, {Hungarian} and {Italian}}, - booktitle = {{CLEF} 2018 {Evaluation} {Labs} and {Workshop}: {Online} {Working} {Notes}}, - publisher = {CEUR-WS}, - author = {Névéol, Aurélie and Robert, Aude and Grippo, F and Morgand, C and Orsi, C and Pelikán, L and Ramadier, Lionel and Rey, Grégoire and Zweigenbaum, Pierre}, - year = {2018}, - month = {September} -} - -@inproceedings{cho_learning_2014, - address = {Doha, Qatar}, - title = {Learning {Phrase} {Representations} using {RNN} {Encoder}–{Decoder} for {Statistical} {Machine} {Translation}}, - urldate = {2018-05-23}, - booktitle = {Proceedings of the 2014 {Conference} on {Empirical} {Methods} in {Natural} {Language} {Processing} ({EMNLP})}, - publisher = {Association for Computational Linguistics}, - author = {Cho, Kyunghyun and van Merrienboer, Bart and Gulcehre, Caglar and Bahdanau, Dzmitry and Bougares, Fethi and Schwenk, Holger and Bengio, Yoshua}, - month = {October}, - year = {2014}, - pages = {1724--1734}, - file = {Full Text PDF:/Users/mario/Zotero/storage/4NE9THT8/Cho et al. - 2014 - Learning Phrase Representations using RNN Encoder–.pdf:application/pdf} -} - -@article{neveol_clinical_2016, - title = {Clinical {Information} {Extraction} at the {CLEF} {eHealth} {Evaluation} lab 2016}, - volume = {1609}, - issn = {1613-0073}, - abstract = {This paper reports on Task 2 of the 2016 CLEF eHealth evaluation lab which extended the previous information extraction tasks of ShARe/CLEF eHealth evaluation labs. The task continued with named entity recognition and normalization in French narratives, as offered in CLEF eHealth 2015. Named entity recognition involved ten types of entities including disorders that were defined according to Semantic Groups in the Unified Medical Language System® (UMLS®), which was also used for normalizing the entities. In addition, we introduced a large-scale classification task in French death certificates, which consisted of extracting causes of death as coded in the International Classification of Diseases, tenth revision (ICD10). Participant systems were evaluated against a blind reference standard of 832 titles of scientific articles indexed in MEDLINE, 4 drug monographs published by the European Medicines Agency (EMEA) and 27,850 death certificates using Precision, Recall and F-measure. In total, seven teams participated, including five in the entity recognition and normalization task, and five in the death certificate coding task. Three teams submitted their systems to our newly offered reproducibility track. For entity recognition, the highest performance was achieved on the EMEA corpus, with an overall F-measure of 0.702 for plain entities recognition and 0.529 for normalized entity recognition. For entity normalization, the highest performance was achieved on the MEDLINE corpus, with an overall F-measure of 0.552. For death certificate coding, the highest performance was 0.848 F-measure.}, - urldate = {2018-05-23}, - journal = {CEUR workshop proceedings}, - author = {Névéol, Aurélie and Cohen, K. Bretonnel and Grouin, Cyril and Hamon, Thierry and Lavergne, Thomas and Kelly, Liadh and Goeuriot, Lorraine and Rey, Grégoire and Robert, Aude and Tannier, Xavier and Zweigenbaum, Pierre}, - month = {September}, - year = {2016}, - pmid = {29308065}, - pmcid = {PMC5756095}, - pages = {28--42}, - file = {PubMed Central Full Text PDF:/Users/mario/Zotero/storage/ZWWRZSZK/Névéol et al. - 2016 - Clinical Information Extraction at the CLEF eHealt.pdf:application/pdf} -} - -@inproceedings{di_nunzio_lexicon_2017, - title = {A {Lexicon} {Based} {Approach} to {Classification} of {ICD}10 {Codes}. {IMS} {Unipd} at {CLEF} {eHealth} {Task}}, - booktitle = {{CLEF} 2017 {Online} {Working} {Notes}}, - publisher = {CEUR-WS}, - author = {Di Nunzio, Giorgio Maria and Beghini, Federica and Vezzani, Federica and Henrot, Genevieve}, - year = {2017}, - file = {Fulltext:/Users/mario/Zotero/storage/HGHINDH3/Di Nunzio et al. - A Lexicon Based Approach to Classification of ICD1.pdf:application/pdf;Snapshot:/Users/mario/Zotero/storage/LWSDB84Q/Di Nunzio et al. - A Lexicon Based Approach to Classification of ICD1.pdf:application/pdf} -} - -@inproceedings{cabot_sibm_2016, - title = {{SIBM} at {CLEF} {eHealth} {Evaluation} {Lab} 2016: {Extracting} {Concepts} in {French} {Medical} {Texts} with {ECMT} and {CIMIND}}, - booktitle = {{CLEF} 2015 {Online} {Working} {Notes}}, - publisher = {CEUR-WS}, - author = {Cabot, Chloé and Soualmia, Lina F. and Dahamna, Badisse and Darmoni, Stéfan J.}, - year = {2016}, - file = {Fulltext:/Users/mario/Zotero/storage/E4ZADEMU/Cabot et al. - SIBM at CLEF eHealth Evaluation Lab 2016 Extracti.pdf:application/pdf;Snapshot:/Users/mario/Zotero/storage/C9ADYETR/Cabot et al. - SIBM at CLEF eHealth Evaluation Lab 2016 Extracti.pdf:application/pdf} -} - -@inproceedings{van_mulligen_erasmus_2016, - title = {Erasmus {MC} at {CLEF} {eHealth} 2016: {Concept} {Recognition} and {Coding} in {French} {Texts}}, - shorttitle = {Erasmus {MC} at {CLEF} {eHealth} 2016}, - booktitle = {{CLEF} 2016 {Online} {Working} {Notes}}, - publisher = {CEUR-WS}, - author = {van Mulligen, Erik M. and Afzal, Zubair and Akhondi, Saber A. and Vo, Dang and Kors, Jan A.}, - year = {2016}, - file = {Fulltext:/Users/mario/Zotero/storage/AT3LSRP4/van Mulligen et al. - Erasmus MC at CLEF eHealth 2016 Concept Recogniti.pdf:application/pdf;Snapshot:/Users/mario/Zotero/storage/5NNCIN2V/van Mulligen et al. - Erasmus MC at CLEF eHealth 2016 Concept Recogniti.pdf:application/pdf} -} - -@inproceedings{mottin_bitem_2016, - title = {{BiTeM} at {CLEF} {eHealth} {Evaluation} {Lab} 2016 {Task} 2: {Multilingual} {Information} {Extraction}.}, - booktitle = {{CLEF} 2016 {Online} {Working} {Notes}}, - publisher = {CEUR-WS}, - author = {Mottin, Luc and Gobeill, Julien and Mottaz, Anaïs and Pasche, Emilie and Gaudinat, Arnaud and Ruch, Patrick}, - year = {2016}, - file = {Fulltext:/Users/mario/Zotero/storage/LF9UGCQZ/Mottin et al. - 2016 - BiTeM at CLEF eHealth Evaluation Lab 2016 Task 2 .pdf:application/pdf;Snapshot:/Users/mario/Zotero/storage/BRRDY3TV/Mottin et al. - 2016 - BiTeM at CLEF eHealth Evaluation Lab 2016 Task 2 .pdf:application/pdf} -} - -@inproceedings{jonnagaddala_automatic_2017, - title = {Automatic coding of death certificates to {ICD}-10 terminology}, - booktitle = {{CLEF} 2017 {Online} {Working} {Notes}}, - publisher = {CEUR-WS}, - author = {Jonnagaddala, Jitendra and Hu, Feiyan}, - year = {2017}, - file = {Fulltext:/Users/mario/Zotero/storage/AW2YGWHC/Jonnagaddala und Hu - Automatic coding of death certificates to ICD-10 t.pdf:application/pdf;Snapshot:/Users/mario/Zotero/storage/VHWNWWPC/Jonnagaddala und Hu - Automatic coding of death certificates to ICD-10 t.pdf:application/pdf} -} - -@inproceedings{ho-dac_litl_2017, - title = {{LITL} at {CLEF} {eHealth}2017: automatic classification of death reports}, - booktitle = {{CLEF} 2017 {Online} {Working} {Notes}}, - publisher = {CEUR-WS}, - author = {Ho-Dac, Lydia-Mai and Fabre, Cécile and Birski, Anouk and Boudraa, Imane and Bourriot, Aline and Cassier, Manon and Delvenne, Léa and Garcia-Gonzalez, Charline and Kang, Eun-Bee and Piccinini, Elisa}, - year = {2017}, - file = {Fulltext:/Users/mario/Zotero/storage/N2Q47RVL/Ho-Dac et al. - LITL at CLEF eHealth2017 automatic classification.pdf:application/pdf;Snapshot:/Users/mario/Zotero/storage/D5T3NUAR/Ho-Dac et al. - LITL at CLEF eHealth2017 automatic classification.pdf:application/pdf} -} - -@inproceedings{ho-dac_litl_2016, - title = {{LITL} at {CLEF} {eHealth}2016: recognizing entities in {French} biomedical documents}, - booktitle = {{CLEF} 2016 {Online} {Working} {Notes}}, - publisher = {CEUR-WS}, - author = {Ho-Dac, Lydia-Mai and Tanguy, Ludovic and Grauby, Céline and Mby, Aurore Heu and Malosse, Justine and Rivière, Laura and Veltz-Mauclair, Amélie}, - year = {2016}, - file = {Fulltext:/Users/mario/Zotero/storage/9YCE3EVM/Ho-Dac et al. - LITL at CLEF eHealth2016 recognizing entities in .pdf:application/pdf;Snapshot:/Users/mario/Zotero/storage/I6YEA4ZT/Ho-Dac et al. - LITL at CLEF eHealth2016 recognizing entities in .pdf:application/pdf} -} - -@inproceedings{dermouche_ecstra-inserm_2016, - title = {{ECSTRA}-{INSERM}@ {CLEF} {eHealth}2016-task 2: {ICD}10 {Code} {Extraction} from {Death} {Certificates}}, - booktitle = {{CLEF} 2016 {Online} {Working} {Notes}}, - author = {Dermouche, Mohamed and Looten, Vincent and Flicoteaux, Rémi and Chevret, Sylvie and Velcin, Julien and Taright, Namik}, - year = {2016}, - file = {Fulltext:/Users/mario/Zotero/storage/WVDFQFEK/Dermouche et al. - ECSTRA-INSERM@ CLEF eHealth2016-task 2 ICD10 Code.pdf:application/pdf;Snapshot:/Users/mario/Zotero/storage/RIMPEW8L/Dermouche et al. - ECSTRA-INSERM@ CLEF eHealth2016-task 2 ICD10 Code.pdf:application/pdf} -} - -@inproceedings{ebersbach_fusion_2017, - title = {Fusion {Methods} for {ICD}10 {Code} {Classification} of {Death} {Certificates} in {Multilingual} {Corpora}}, - booktitle = {{CLEF} 2017 {Online} {Working} {Notes}}, - publisher = {CEUR-WS}, - author = {Ebersbach, Mike and Herms, Robert and Eibl, Maximilian}, - year = {2017}, - file = {Fulltext:/Users/mario/Zotero/storage/LKIZA2P4/Ebersbach et al. - 2017 - Fusion Methods for ICD10 Code Classification of De.pdf:application/pdf;Snapshot:/Users/mario/Zotero/storage/CIX48RIC/Ebersbach et al. - 2017 - Fusion Methods for ICD10 Code Classification of De.pdf:application/pdf} -} - -@inproceedings{xu_show_2015, - title = {Show, attend and tell: {Neural} image caption generation with visual attention}, - shorttitle = {Show, attend and tell}, - booktitle = {International {Conference} on {Machine} {Learning}}, - author = {Xu, Kelvin and Ba, Jimmy and Kiros, Ryan and Cho, Kyunghyun and Courville, Aaron and Salakhudinov, Ruslan and Zemel, Rich and Bengio, Yoshua}, - year = {2015}, - pages = {2048--2057}, - file = {Fulltext:/Users/mario/Zotero/storage/QASCM4G3/Xu et al. - 2015 - Show, attend and tell Neural image caption genera.pdf:application/pdf;Snapshot:/Users/mario/Zotero/storage/VILIPKYC/Xu et al. - 2015 - Show, attend and tell Neural image caption genera.pdf:application/pdf} -} - -@inproceedings{chan_listen_2016, - title = {Listen, attend and spell: {A} neural network for large vocabulary conversational speech recognition}, - shorttitle = {Listen, attend and spell}, - booktitle = {Acoustics, {Speech} and {Signal} {Processing} ({ICASSP}), 2016 {IEEE} {International} {Conference} on}, - publisher = {IEEE}, - author = {Chan, William and Jaitly, Navdeep and Le, Quoc and Vinyals, Oriol}, - year = {2016}, - pages = {4960--4964}, - file = {Fulltext:/Users/mario/Zotero/storage/ZV5B2GQJ/Chan et al. - 2016 - Listen, attend and spell A neural network for lar.pdf:application/pdf;Snapshot:/Users/mario/Zotero/storage/RS8MBCM8/7472621.html:text/html} -} - -@inproceedings{vinyals_show_2015, - title = {Show and tell: {A} neural image caption generator}, - shorttitle = {Show and tell}, - booktitle = {Computer {Vision} and {Pattern} {Recognition} ({CVPR}), 2015 {IEEE} {Conference} on}, - publisher = {IEEE}, - author = {Vinyals, Oriol and Toshev, Alexander and Bengio, Samy and Erhan, Dumitru}, - year = {2015}, - pages = {3156--3164}, - file = {Fulltext:/Users/mario/Zotero/storage/YYYDMHJD/Vinyals et al. - 2015 - Show and tell A neural image caption generator.pdf:application/pdf;Snapshot:/Users/mario/Zotero/storage/XQBCTX6S/Vinyals et al. - 2015 - Show and tell A neural image caption generator.pdf:application/pdf} -} - -@inproceedings{faruqui_improving_2014, - title = {Improving vector space word representations using multilingual correlation}, - booktitle = {Proceedings of the 14th {Conference} of the {European} {Chapter} of the {Association} for {Computational} {Linguistics}}, - author = {Faruqui, Manaal and Dyer, Chris}, - year = {2014}, - pages = {462--471}, - file = {Fulltext:/Users/mario/Zotero/storage/8X3ZWRRV/Faruqui und Dyer - 2014 - Improving vector space word representations using .pdf:application/pdf;Snapshot:/Users/mario/Zotero/storage/9TNA35WS/Faruqui und Dyer - 2014 - Improving vector space word representations using .pdf:application/pdf} -} - -@inproceedings{vyas_sparse_2016, - title = {Sparse bilingual word representations for cross-lingual lexical entailment}, - booktitle = {Proceedings of the 2016 {Conference} of the {North} {American} {Chapter} of the {Association} for {Computational} {Linguistics}: {Human} {Language} {Technologies}}, - author = {Vyas, Yogarshi and Carpuat, Marine}, - year = {2016}, - pages = {1187--1197}, - file = {Fulltext:/Users/mario/Zotero/storage/WK4I4EGG/Vyas und Carpuat - 2016 - Sparse bilingual word representations for cross-li.pdf:application/pdf;Snapshot:/Users/mario/Zotero/storage/A9SJS6LH/Vyas und Carpuat - 2016 - Sparse bilingual word representations for cross-li.pdf:application/pdf} -} - -@inproceedings{pham_learning_2015, - title = {Learning distributed representations for multilingual text sequences}, - booktitle = {Proceedings of the 1st {Workshop} on {Vector} {Space} {Modeling} for {Natural} {Language} {Processing}}, - author = {Pham, Hieu and Luong, Thang and Manning, Christopher}, - year = {2015}, - pages = {88--94}, - file = {Fulltext:/Users/mario/Zotero/storage/CF3JNTCK/Pham et al. - 2015 - Learning distributed representations for multiling.pdf:application/pdf;Snapshot:/Users/mario/Zotero/storage/8STRAYTS/Pham et al. - 2015 - Learning distributed representations for multiling.pdf:application/pdf} -} - -@article{vulic_bilingual_2016, - title = {Bilingual distributed word representations from document-aligned comparable data}, - volume = {55}, - journal = {Journal of Artificial Intelligence Research}, - author = {Vulić, Ivan and Moens, Marie-Francine}, - year = {2016}, - pages = {953--994}, - file = {Fulltext:/Users/mario/Zotero/storage/DF26LCTD/Vulić und Moens - 2016 - Bilingual distributed word representations from do.pdf:application/pdf;Snapshot:/Users/mario/Zotero/storage/UN3RUE5Y/10997.html:text/html} -} - -@inproceedings{xing_normalized_2015, - title = {Normalized word embedding and orthogonal transform for bilingual word translation}, - booktitle = {Proceedings of the 2015 {Conference} of the {North} {American} {Chapter} of the {Association} for {Computational} {Linguistics}: {Human} {Language} {Technologies}}, - author = {Xing, Chao and Wang, Dong and Liu, Chao and Lin, Yiye}, - year = {2015}, - pages = {1006--1011}, - file = {Fulltext:/Users/mario/Zotero/storage/W6XGG8LG/Xing et al. - 2015 - Normalized word embedding and orthogonal transform.pdf:application/pdf;Snapshot:/Users/mario/Zotero/storage/TKYHHWZZ/Xing et al. - 2015 - Normalized word embedding and orthogonal transform.pdf:application/pdf} -} - -@inproceedings{guo_cross-lingual_2015, - title = {Cross-lingual dependency parsing based on distributed representations}, - volume = {1}, - booktitle = {Proceedings of the 53rd {Annual} {Meeting} of the {Association} for {Computational} {Linguistics} and the 7th {International} {Joint} {Conference} on {Natural} {Language} {Processing} ({Volume} 1: {Long} {Papers})}, - author = {Guo, Jiang and Che, Wanxiang and Yarowsky, David and Wang, Haifeng and Liu, Ting}, - year = {2015}, - pages = {1234--1244}, - file = {Fulltext:/Users/mario/Zotero/storage/D6Q9WBXY/Guo et al. - 2015 - Cross-lingual dependency parsing based on distribu.pdf:application/pdf;Snapshot:/Users/mario/Zotero/storage/K8NNEQ6H/Guo et al. - 2015 - Cross-lingual dependency parsing based on distribu.pdf:application/pdf} -} - -@inproceedings{sogaard_inverted_2015, - title = {Inverted indexing for cross-lingual {NLP}}, - booktitle = {The 53rd {Annual} {Meeting} of the {Association} for {Computational} {Linguistics} and the 7th {International} {Joint} {Conference} of the {Asian} {Federation} of {Natural} {Language} {Processing} ({ACL}-{IJCNLP} 2015)}, - author = {Søgaard, Anders and Agić, Željko and Alonso, Héctor MartÃnez and Plank, Barbara and Bohnet, Bernd and Johannsen, Anders}, - year = {2015}, - file = {Fulltext:/Users/mario/Zotero/storage/UZN66Q7M/Søgaard et al. - 2015 - Inverted indexing for cross-lingual NLP.pdf:application/pdf;Snapshot:/Users/mario/Zotero/storage/26MECM8N/Søgaard et al. - 2015 - Inverted indexing for cross-lingual NLP.pdf:application/pdf} -} - -@article{bojanowski_enriching_2017, - title = {Enriching {Word} {Vectors} with {Subword} {Information}}, - volume = {5}, - number = {1}, - journal = {Transactions of the Association of Computational Linguistics}, - author = {Bojanowski, Piotr and Grave, Edouard and Joulin, Armand and Mikolov, Tomas}, - year = {2017}, - pages = {135--146}, - file = {Fulltext:/Users/mario/Zotero/storage/ZMHFQUNA/Bojanowski et al. - 2017 - Enriching Word Vectors with Subword Information.pdf:application/pdf;Snapshot:/Users/mario/Zotero/storage/USMIEAEL/Bojanowski et al. - 2017 - Enriching Word Vectors with Subword Information.pdf:application/pdf} -} - -@inproceedings{kingma_adam:_2014, - title = {Adam: {A} method for stochastic optimization}, - booktitle = {Proceedings of the 3rd {International} {Conference} on {Learning} {Representations} ({ICLR})}, - author = {Kingma, Diederik P. and Ba, Jimmy}, - year = {2014}, - file = {Fulltext:/Users/mario/Zotero/storage/A9DC95XN/Kingma und Ba - 2014 - Adam A method for stochastic optimization.pdf:application/pdf;Snapshot:/Users/mario/Zotero/storage/4CQAFF7H/1412.html:text/html} -} - -@inproceedings{peters_semi-supervised_2017, - title = {Semi-supervised sequence tagging with bidirectional language models}, - volume = {1}, - booktitle = {Proceedings of the 55th {Annual} {Meeting} of the {Association} for {Computational} {Linguistics} ({Volume} 1: {Long} {Papers})}, - author = {Peters, Matthew and Ammar, Waleed and Bhagavatula, Chandra and Power, Russell}, - year = {2017}, - pages = {1756--1765}, - file = {Fulltext:/Users/mario/Zotero/storage/UQYRUUBQ/Peters et al. - 2017 - Semi-supervised sequence tagging with bidirectiona.pdf:application/pdf;Snapshot:/Users/mario/Zotero/storage/PJ2YN7VR/Peters et al. - 2017 - Semi-supervised sequence tagging with bidirectiona.pdf:application/pdf} -} - -@inproceedings{pinter_mimicking_2017, - title = {Mimicking {Word} {Embeddings} using {Subword} {RNNs}}, - booktitle = {Proceedings of the 2017 {Conference} on {Empirical} {Methods} in {Natural} {Language} {Processing}}, - author = {Pinter, Yuval and Guthrie, Robert and Eisenstein, Jacob}, - year = {2017}, - pages = {102--112}, - file = {Fulltext:/Users/mario/Zotero/storage/QY3T7DCJ/Pinter et al. - 2017 - Mimicking Word Embeddings using Subword RNNs.pdf:application/pdf;Snapshot:/Users/mario/Zotero/storage/MD8TGGLY/Pinter et al. - 2017 - Mimicking Word Embeddings using Subword RNNs.pdf:application/pdf} -} \ No newline at end of file diff --git a/paper/wbi-eclef18.pdf b/paper/wbi-eclef18.pdf new file mode 100644 index 0000000000000000000000000000000000000000..70b1eb844748a78f3a7236fa4280cc5b646f1b68 Binary files /dev/null and b/paper/wbi-eclef18.pdf differ diff --git a/paper/wbi-eclef18.tex b/paper/wbi-eclef18.tex index 075ec72d96c13b60ca7f0ffa11273be45bd66a0b..92e030f0793a0c06d6ebe6a82c0cd17d5f3d828a 100644 --- a/paper/wbi-eclef18.tex +++ b/paper/wbi-eclef18.tex @@ -33,15 +33,15 @@ % an abbreviated paper title here \titlerunning{ICD-10 coding using multi-lingual embeddings and RNNs} -\author{Jurica \v{S}eva\inst{1} \and -Mario Sänger\inst{1} \and -Ulf Leser\inst{1}} +\author{Jurica \v{S}eva \and +Mario Sänger \and +Ulf Leser} % First names are abbreviated in the running head. % If there are more than two authors, 'et al.' is used. \authorrunning{\v{S}eva et al.} -\institute{\inst{1}Humboldt-Universität zu Berlin, Knowledge Management in +\institute{Humboldt-Universität zu Berlin, Knowledge Management in Bioinformatics, \\ Berlin, Germany\\ \email{\{seva,saengema,leser\}@informatik.hu-berlin.de}} % diff --git a/paperwork/Agreement.docx b/paperwork/Agreement.docx new file mode 100644 index 0000000000000000000000000000000000000000..befaeb79e8296955390f76fa1fcc1e9734e46a31 Binary files /dev/null and b/paperwork/Agreement.docx differ