diff --git a/.sync_exclude b/.sync_exclude new file mode 100644 index 0000000000000000000000000000000000000000..ff328a400a15755441e76c46fd8666d4fcc6e04b --- /dev/null +++ b/.sync_exclude @@ -0,0 +1,11 @@ +.git +.idea +_env +_eval +_logs +_out +_resources/doc2vec +_tmp +__pycache__ +data/__pycache__ +util/__pycache__ diff --git a/.sync_ignore b/.sync_ignore new file mode 100644 index 0000000000000000000000000000000000000000..f29e6611694ef7ba63a2441f771eb429a2d64d8a --- /dev/null +++ b/.sync_ignore @@ -0,0 +1,4 @@ +_resources +clef18/code_jurica/data +clef18/code_mario/data +clef18/code_mario/embeddings diff --git a/code_jurica/seq2seq_attention.py b/code_jurica/seq2seq_attention.py index d3688edb53cd64e07a81abec249b01713620d834..7db82f606ef275e9fa5a1300f344f126f7b2f802 100644 --- a/code_jurica/seq2seq_attention.py +++ b/code_jurica/seq2seq_attention.py @@ -90,7 +90,12 @@ validation_data_generator = KerasBatchGenerator(batch_size, target_kerasTokenizer ) -print("Lets train some stuff!") + +## ------------------------------------------------------------------------------ +## Encoder-decoder training model + +print("Building training model") + # Define an input sequence and process it. encoder_input = Input(shape=(source_max_sequence_tokenizer, )) x = source_embedding_layer(encoder_input) @@ -105,9 +110,14 @@ decoder, state_h_decode , state_c_decode = decoder_LSTM(x_decode, initial_state= # Equation (7) with 'dot' score from Section 3.1 in the paper. # Note that we reuse Softmax-activation layer instead of writing tensor calculation + +print("Encoder-out: ", encoder_out) +print("Decoder: ", decoder) + attention = dot([encoder_out, decoder], axes=[2, 2]) attention = Activation('softmax')(attention) -context = dot([attention, encoder_out], axes=[1,1]) + +context = dot([attention, encoder_out], axes=[1, 1]) decoder_combined_context = concatenate([context, decoder]) print(decoder_combined_context) @@ -118,6 +128,8 @@ decoder_out = decoder_dense(decoder_combined_context) # equation (6) of the pape model = Model([encoder_input, decoder_input], decoder_out) model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['accuracy']) model.summary() + + # model.fit([source_train, target_train], # target_train_onehot, # batch_size=batch_size, @@ -138,11 +150,23 @@ model.fit_generator( # workers=10 ) +## ------------------------------------------------------------------------------ # INFERENCE MODELS + + # Encoder inference model -encoder_model_inf = Model(encoder_input, encoder_states) +print("Build encoder inference model") +encoder_model_inf = Model(encoder_input, [encoder_out] + encoder_states) +encoder_model_inf.summary() + +# --------------------------------------------------------------------------------------- # Decoder inference model + +# The encoded input line -> necessary for attention calculation! +encoder_out_in = Input(shape=(source_max_sequence_tokenizer, 256,)) + +# Final hidden and cell state of the encoder model -> initial state for the decoder! decoder_state_input_h = Input(shape=(256,)) decoder_state_input_c = Input(shape=(256,)) decoder_input_states = [decoder_state_input_h, decoder_state_input_c] @@ -150,21 +174,29 @@ decoder_input_states = [decoder_state_input_h, decoder_state_input_c] decoder, decoder_h, decoder_c = decoder_LSTM(x_decode, initial_state=decoder_input_states) decoder_states = [decoder_h , decoder_c] -attention = dot([encoder_out, decoder], axes=[2, 2]) +print("Encoder-out-in: ", encoder_out_in) +print("Decoder: ", decoder) + +attention = dot([encoder_out_in, decoder], axes=[2, 2]) attention = Activation('softmax')(attention) -context = dot([attention, encoder_out], axes=[1,1]) +context = dot([attention, encoder_out_in], axes=[1, 1]) print(context, decoder) decoder_combined_context = concatenate([context, decoder]) print('decoder_combined_context\t', decoder_combined_context) decoder_out = decoder_dense(decoder_combined_context) -decoder_model_inf = Model(inputs=[decoder_input] + decoder_input_states, - outputs=[decoder_out] + decoder_states ) + +print("Build decoder inference model") +decoder_model_inf = Model(inputs=[decoder_input, encoder_out_in] + decoder_input_states, + outputs=[decoder_out] + decoder_states) +decoder_model_inf.summary() def decode_seq(inp_seq): - states_val = encoder_model_inf.predict(inp_seq) + states_out, states_h, states_c = encoder_model_inf.predict(inp_seq) + states_val = [states_h, states_c] + print('states_val\t', states_val) input('inference encoder prediction\t') @@ -177,7 +209,7 @@ def decode_seq(inp_seq): while not stop_condition: - decoder_out, decoder_h, decoder_c = decoder_model_inf.predict(x=[target_seq] + states_val) + decoder_out, decoder_h, decoder_c = decoder_model_inf.predict(x=[target_seq, states_out] + states_val) max_val_index = np.argmax(decoder_out[0, -1, :]) try: @@ -236,4 +268,4 @@ for seq_index in tqdm.tqdm(range(len(source_val))): report = classification_report(y_true, y_pred) report_df = report_to_df(report) report_df.to_csv('logs/classification_report_extended.csv') -print(report_df) \ No newline at end of file +print(report_df) diff --git a/code_jurica/util.py b/code_jurica/util.py index 4eafea551ab4532de48724b663a810d3a595042c..6a8aab94a91fb373dad2e35aa8fd8d6a1ab26653 100644 --- a/code_jurica/util.py +++ b/code_jurica/util.py @@ -18,7 +18,7 @@ import keras from keras.preprocessing.sequence import pad_sequences from collections import Counter import random -import osdf +import os #REPRODUCIBLE np.random.seed(42) @@ -330,7 +330,7 @@ class prepareData(): random.shuffle(preparedDictionary) corpus = [item[0] for item in preparedDictionary] labels = [item[1] for item in preparedDictionary] - print(Counterlabels) + #print(Counterlabels) return corpus, labels class KerasBatchGenerator(keras.utils.Sequence):