diff --git a/code_jurica/classificationICD10_attention.py b/code_jurica/classificationICD10_attention.py index 14cf975624d489a611cef6974c1fc42c49bea182..a82fac8be8d8908da8520e16a6adf6f229ee5815 100644 --- a/code_jurica/classificationICD10_attention.py +++ b/code_jurica/classificationICD10_attention.py @@ -5,11 +5,13 @@ from util import * import numpy as np import random +import tensorflow as tf import traceback from sklearn.preprocessing import LabelEncoder from sklearn.model_selection import train_test_split from collections import Counter +from keras import backend as K from keras.preprocessing.sequence import pad_sequences from keras.preprocessing.text import Tokenizer from keras.optimizers import Adam @@ -18,22 +20,38 @@ from keras.layers import Embedding, Input, LSTM, Dense, Bidirectional from keras.models import Model from keras.utils import multi_gpu_model, np_utils -import tensorflow as tf - from _layers import AttentionWithContext, Attention + +#REPRODUCIBLE +np.random.seed(42) +import random +random.seed(12345) +import os +os.environ['PYTHONHASHSEED'] = '0' + +import tensorflow as tf +config = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) +from keras import backend as K +tf.set_random_seed(1234) +#REPRODUCIBLE + ################################### # TensorFlow wizardry -config=tf.ConfigProto() +# config=tf.ConfigProto() # Don't pre-allocate memory; allocate as-needed config.gpu_options.allow_growth=True config.gpu_options.allocator_type='BFC' +sess = tf.Session(graph=tf.get_default_graph(), config=config) +K.set_session(sess) + callbacks_list=[ EarlyStopping( monitor='val_loss', patience=2, + min_delta=0.005 ), ModelCheckpoint( filepath='models/icd10Classification_attention.h5', diff --git a/code_jurica/loader.py b/code_jurica/loader.py index a1d2bf0ccb89a44246ce439f672171e30a09ef11..eea1eaa18c58f26549d4e5bb6945bf16cd5b58e1 100644 --- a/code_jurica/loader.py +++ b/code_jurica/loader.py @@ -1,5 +1,4 @@ from util import * -import random import numpy as np from keras.preprocessing.sequence import pad_sequences from keras.preprocessing.text import Tokenizer @@ -8,6 +7,14 @@ import random from sklearn.model_selection import train_test_split import pickle +#REPRODUCIBLE +np.random.seed(42) +import random +random.seed(12345) +import os +os.environ['PYTHONHASHSEED'] = '0' +#REPRODUCIBLE + kerasTokenizer = Tokenizer() tokenizer = TokenizePreprocessor() prepareData = prepareData() diff --git a/code_jurica/seq2seq.py b/code_jurica/seq2seq.py index 4f0e2cce590b1d4e863c5d6ec26f72f67c801d2c..fd22ce9f0089f852aefed93c4716d508b1887e43 100644 --- a/code_jurica/seq2seq.py +++ b/code_jurica/seq2seq.py @@ -13,14 +13,31 @@ import tqdm import pickle from sklearn.metrics import classification_report +#REPRODUCIBLE +np.random.seed(42) +import random +random.seed(12345) +import os +os.environ['PYTHONHASHSEED'] = '0' + +import tensorflow as tf +config = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) +from keras import backend as K +tf.set_random_seed(1234) +#REPRODUCIBLE + + ################################### # TensorFlow wizardry -config = tf.ConfigProto() +# config = tf.ConfigProto() # Don't pre-allocate memory; allocate as-needed config.gpu_options.allow_growth = True config.gpu_options.allocator_type = 'BFC' +sess = tf.Session(graph=tf.get_default_graph(), config=config) +K.set_session(sess) + # LOAD ICD 10 CLASSIFICATION MODEL try: icd10_model = keras_load_model('models/icd10Classification_attention.h5', diff --git a/code_jurica/test.py b/code_jurica/test.py index 4b2b25371a340eb13695f1a45f93feaa679bd4ca..e105d34e55059ff2ff1761cc9b9bfb2da7f0fa8d 100644 --- a/code_jurica/test.py +++ b/code_jurica/test.py @@ -8,6 +8,21 @@ from sklearn.metrics import classification_report from util import report_to_df import numpy as np +#REPRODUCIBLE +np.random.seed(42) +import random +random.seed(12345) +import os +os.environ['PYTHONHASHSEED'] = '0' + +import tensorflow as tf +session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) +from keras import backend as K +tf.set_random_seed(1234) +sess = tf.Session(graph=tf.get_default_graph(), config=session_conf) +K.set_session(sess) +#REPRODUCIBLE + with open('models/train_test_split.p', 'rb') as handle: data_set = pickle.load(handle) diff --git a/code_jurica/train.sh b/code_jurica/train.sh index 0165144f8bf038dc04af7741f8c1bbd6f4da179a..cae9235179793fdd4bd0a33cfe0ba6328816ffe4 100644 --- a/code_jurica/train.sh +++ b/code_jurica/train.sh @@ -1,4 +1,4 @@ #!/bin/bash -#CUDA_VISIBLE_DEVICES=3 /home/sevajuri/anaconda3/bin/python3 /home/sevajuri/projects/clef18/code_jurica/classificationICD10_attention.py +CUDA_VISIBLE_DEVICES=3 /home/sevajuri/anaconda3/bin/python3 /home/sevajuri/projects/clef18/code_jurica/classificationICD10_attention.py CUDA_VISIBLE_DEVICES=3 /home/sevajuri/anaconda3/bin/python3 /home/sevajuri/projects/clef18/code_jurica/seq2seq.py CUDA_VISIBLE_DEVICES=3 /home/sevajuri/anaconda3/bin/python3 /home/sevajuri/projects/clef18/code_jurica/test.py diff --git a/code_jurica/util.py b/code_jurica/util.py index f418a80ed22f72186dd9770f48807b614d240208..b96b5d4591de14c743256fb92442d8edce824ec8 100644 --- a/code_jurica/util.py +++ b/code_jurica/util.py @@ -16,6 +16,14 @@ import math import datetime from io import StringIO +#REPRODUCIBLE +np.random.seed(42) +import random +random.seed(12345) +import os +os.environ['PYTHONHASHSEED'] = '0' +#REPRODUCIBLE + now = datetime.datetime.now() date_label=now.strftime("%Y_%m_%d")