From 2554815df28cc46b994ac17e858fa0091e6a17fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mario=20Sa=CC=88nger?= <mario.saenger@student.hu-berlin.de> Date: Tue, 26 Jun 2018 12:00:11 +0200 Subject: [PATCH] Minor fixes to seq2seq logging --- .idea/deployment.xml | 29 ----------------------------- code_jurica/classificationICD10.py | 13 +++++++++---- code_jurica/seq2seq.py | 4 ++-- code_jurica/seq2seq_attention.py | 10 +++++----- code_jurica/seq2seq_base.py | 6 +++--- 5 files changed, 19 insertions(+), 43 deletions(-) delete mode 100644 .idea/deployment.xml diff --git a/.idea/deployment.xml b/.idea/deployment.xml deleted file mode 100644 index 242dd10..0000000 --- a/.idea/deployment.xml +++ /dev/null @@ -1,29 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<project version="4"> - <component name="PublishConfigData" autoUpload="Always" serverName="guppi" createEmptyFolders="true" persistUploadOnCheckin="false" autoUploadExternalChanges="true"> - <serverData> - <paths name="guppi"> - <serverdata> - <mappings> - <mapping deploy="/projects/clef18" local="$PROJECT_DIR$" web="/" /> - </mappings> - </serverdata> - </paths> - <paths name="sonic"> - <serverdata> - <mappings> - <mapping local="$PROJECT_DIR$" web="/" /> - </mappings> - </serverdata> - </paths> - <paths name="vistTriton"> - <serverdata> - <mappings> - <mapping deploy="/clef18" local="$PROJECT_DIR$" web="/" /> - </mappings> - </serverdata> - </paths> - </serverData> - <option name="myAutoUpload" value="ALWAYS" /> - </component> -</project> \ No newline at end of file diff --git a/code_jurica/classificationICD10.py b/code_jurica/classificationICD10.py index 4267bda..c9cdac8 100644 --- a/code_jurica/classificationICD10.py +++ b/code_jurica/classificationICD10.py @@ -2,6 +2,7 @@ # experiment=Experiment(api_key="hSd9vTj0EfMu72569YnVEvtvj") # from loader import * +import keras.backend as K from util import * import numpy as np import random @@ -27,14 +28,17 @@ config=tf.ConfigProto() config.gpu_options.allow_growth=True config.gpu_options.allocator_type='BFC' +sess = tf.Session(graph=tf.get_default_graph(), config=config) +K.set_session(sess) + callbacks_list=[ EarlyStopping( monitor='val_loss', - patience=2, + patience=50, ), ModelCheckpoint( filepath='models/icd10Classification.h5', - monitor='val_loss', + monitor='loss', save_best_only=True, ), CSVLogger( @@ -44,7 +48,7 @@ callbacks_list=[ ] latent_dim = 512 -epochs = 100 +epochs = 500 batch_size = 1000 tokenizer=TokenizePreprocessor() @@ -115,7 +119,8 @@ try: batch_size=batch_size, epochs=epochs, callbacks=callbacks_list, - validation_split=0.25 + validation_data=[X_test, Y_test] + #verbose=0 ) except Exception as e: diff --git a/code_jurica/seq2seq.py b/code_jurica/seq2seq.py index 9aef90c..496546c 100644 --- a/code_jurica/seq2seq.py +++ b/code_jurica/seq2seq.py @@ -48,10 +48,10 @@ except OSError: from classificationICD10 import * icd10_model = keras_load_model('models/icd10Classification_attention_extended.h5') -with open('models/icd10_tokenizer_extended.p', 'rb') as handle: +with open('models/icd10_tokenizer.p', 'rb') as handle: icd10Tokenizer = pickle.load(handle) -with open('models/icd10_mappings_extended.p', 'rb') as handle: +with open('models/icd10_mappings.p', 'rb') as handle: encoded_Y = pickle.load(handle) # LOAD ICD 10 CLASSIFICATION MODEL diff --git a/code_jurica/seq2seq_attention.py b/code_jurica/seq2seq_attention.py index 23bfd07..4a87398 100644 --- a/code_jurica/seq2seq_attention.py +++ b/code_jurica/seq2seq_attention.py @@ -45,17 +45,17 @@ except OSError: from classificationICD10 import * icd10_model = keras_load_model('models/icd10Classification_attention_extended.h5') -with open('models/icd10_tokenizer_extended.p', 'rb') as handle: +with open('models/icd10_tokenizer.p', 'rb') as handle: icd10Tokenizer = pickle.load(handle) -with open('models/icd10_mappings_extended.p', 'rb') as handle: +with open('models/icd10_mappings.p', 'rb') as handle: encoded_Y = pickle.load(handle) # LOAD ICD 10 CLASSIFICATION MODEL callbacks_list = [ EarlyStopping( monitor='val_loss', - patience=2, + patience=5, # min_delta=0.001 ), ModelCheckpoint( @@ -71,7 +71,7 @@ callbacks_list = [ latent_dim = 256 batch_size = 400 -epochs = 1 +epochs = 2000 train_data_generator = KerasBatchGenerator(batch_size, source_train, @@ -237,7 +237,7 @@ run_pipeline_prediction(source_val, decode_seq, icd10_model, encoded_Y, labels_v source_kerasTokenizer, source_max_sequence_tokenizer, icd10Tokenizer, max_icd10_length, target_index_to_word_dict, target_val, - 'logs/seq2seq') + 'logs/seq2seq-att') # y_true = [] # y_pred = [] diff --git a/code_jurica/seq2seq_base.py b/code_jurica/seq2seq_base.py index ca5ff4b..5f2b6ea 100644 --- a/code_jurica/seq2seq_base.py +++ b/code_jurica/seq2seq_base.py @@ -55,13 +55,13 @@ def run_pipeline_prediction(sentences: List[str], decode_seq_fnc: Callable, icd1 pred_text = " ".join(translated_sent) pred_texts.append(pred_text) - pred_indexes = " ".join(translated_index) + pred_indexes = " ".join([str(i) for i in translated_index]) pred_ids.append(pred_indexes) gold_indexes = np.trim_zeros(gold_target_indexes[seq_index], 'b')[1:-1] - gold_ids.append(" ".join(gold_indexes)) + gold_ids.append(" ".join([str(i) for i in gold_indexes])) - gold_text = " ".join([target_index_to_word_dict[x] for x in gold_indexes]) + gold_text = " ".join([target_index_to_word_dict[x] for x in gold_indexes if x in target_index_to_word_dict]) gold_texts.append(gold_text) print('Target indexes:', gold_indexes) -- GitLab