% This is samplepaper.tex, a sample chapter demonstrating the % LLNCS macro package for Springer Computer Science proceedings; % Version 2.20 of 2017/10/04 % \documentclass[runningheads]{llncs} \pagestyle{empty} \usepackage[utf8]{inputenc} \usepackage[english]{babel} \usepackage{color} \usepackage{multirow,tabularx} \usepackage{booktabs} \usepackage[hyphens]{url} \usepackage{hyperref} \usepackage{array} \newcolumntype{L}[1]{>{\raggedright\let\newline\\\arraybackslash\hspace{0pt}}m{#1}} \newcolumntype{C}[1]{>{\centering\let\newline\\\arraybackslash\hspace{0pt}}m{#1}} % Used for displaying a sample figure. If possible, figure files should % be included in EPS format. \usepackage{graphicx} % If you use the hyperref package, please uncomment the following line % to display URLs in blue roman font according to Springer's eBook style: \renewcommand\UrlFont{\color{blue}\rmfamily} \begin{document} \title{WBI at CLEF eHealth 2018 Task 1: Language-independent ICD-10 coding using multi-lingual embeddings and recurrent neural networks} % If the paper title is too long for the running head, you can set % an abbreviated paper title here \titlerunning{ICD-10 coding using multi-lingual embeddings and RNNs} \author{Jurica \v{S}eva \and Mario Sänger \and Ulf Leser} % First names are abbreviated in the running head. % If there are more than two authors, 'et al.' is used. \authorrunning{\v{S}eva et al.} \institute{Humboldt-Universität zu Berlin, Knowledge Management in Bioinformatics, \\ Berlin, Germany\\ \email{\{seva,saengema,leser\}@informatik.hu-berlin.de}} % \maketitle % typeset the header of the contribution % \begin{abstract} This paper describes the participation of the WBI team in the CLEF eHealth 2018 shared task 1 (``Multilingual Information Extraction - ICD-10 coding''). Our contribution focus on the setup and evaluation of a baseline language-independent neural architecture for ICD-10 classification as well as a simple, heuristic multi-language word embedding space. The approach builds on two recurrent neural networks models to extract and classify causes of death from French, Italian and Hungarian death certificates. First, we employ a LSTM-based sequence-to-sequence model to obtain a death cause from each death certificate line. We then utilize a bidirectional LSTM model with attention mechanism to assign the respective ICD-10 codes to the received death cause description. Both models take multi-language word embeddings as inputs. During evaluation our best model achieves an F-score of 0.34 for French, 0.45 for Hungarian and 0.77 for Italian. The results are encouraging for future work as well as the extension and improvement of the proposed baseline system. \keywords{ICD-10 coding \and Biomedical information extraction \and Multi-lingual sequence-to-sequence model \and Represention learning \and Recurrent neural network \and Attention mechanism \and Multi-language embeddings} \end{abstract} \section{Introduction} \input{10_introduction} \section{Related work} \input{20_related_work} \section{Methods} \label{sec:methods} \input{30_methods_intro} \input{31_methods_seq2seq} \input{32_methods_icd10} \section{Experiments and Results} \input{40_experiments} \section{Conclusion and Future Work} \input{50_conclusion} \bibliography{references} \bibliographystyle{splncs04} \end{document}