diff --git a/.sync_exclude b/.sync_exclude
new file mode 100644
index 0000000000000000000000000000000000000000..ff328a400a15755441e76c46fd8666d4fcc6e04b
--- /dev/null
+++ b/.sync_exclude
@@ -0,0 +1,11 @@
+.git
+.idea
+_env
+_eval
+_logs
+_out
+_resources/doc2vec
+_tmp
+__pycache__
+data/__pycache__
+util/__pycache__
diff --git a/.sync_ignore b/.sync_ignore
new file mode 100644
index 0000000000000000000000000000000000000000..f29e6611694ef7ba63a2441f771eb429a2d64d8a
--- /dev/null
+++ b/.sync_ignore
@@ -0,0 +1,4 @@
+_resources
+clef18/code_jurica/data
+clef18/code_mario/data
+clef18/code_mario/embeddings
diff --git a/code_jurica/seq2seq_attention.py b/code_jurica/seq2seq_attention.py
index 9d17d2f9f56cd8ca90229a5df5b5b6e456f8d70c..7db82f606ef275e9fa5a1300f344f126f7b2f802 100644
--- a/code_jurica/seq2seq_attention.py
+++ b/code_jurica/seq2seq_attention.py
@@ -2,7 +2,7 @@
 # experiment = Experiment(api_key="hSd9vTj0EfMu72569YnVEvtvj")
 
 from loader import *
-from _layers import AttentionWithContext, Attention, AttentionDecoder
+from _layers import AttentionWithContext, Attention
 from keras.models import Model, load_model as keras_load_model
 from keras.layers import Input, LSTM, Dense, Embedding, GRU, Activation, dot, concatenate, Bidirectional, TimeDistributed
 from keras.utils import multi_gpu_model
@@ -90,34 +90,46 @@ validation_data_generator = KerasBatchGenerator(batch_size,
                                            target_kerasTokenizer
                                            )
 
-print("Lets train some stuff!")
+
+## ------------------------------------------------------------------------------
+## Encoder-decoder training model
+
+print("Building training model")
+
 # Define an input sequence and process it.
-encoder_input = Input(shape=(source_max_sequence_tokenizer, ), name='encoder_input')
-x_encoder = source_embedding_layer(encoder_input)
-encoder_out, state_h, state_c = LSTM(latent_dim, return_sequences=True, unroll=True, return_state=True, name='encoder_lstm')(x_encoder)
+encoder_input = Input(shape=(source_max_sequence_tokenizer, ))
+x = source_embedding_layer(encoder_input)
+encoder_out, state_h, state_c = LSTM(latent_dim, return_sequences=True, unroll=True, return_state=True)(x)
 encoder_states = [state_h, state_c]
 
 # Set up the decoder, using `encoder_states` as initial state.
-decoder_input = Input(shape=(target_max_sequence_tokenizer, ), name='decoder_input')
+decoder_input = Input(shape=(target_max_sequence_tokenizer, ))
 x_decode = target_embedding_layer(decoder_input)
-decoder_LSTM = LSTM(latent_dim, return_sequences=True, return_state = True, unroll=True, name='decoder_lstm')
+decoder_LSTM = LSTM(latent_dim, return_sequences=True, return_state = True, unroll=True)
 decoder, state_h_decode , state_c_decode = decoder_LSTM(x_decode, initial_state=encoder_states)
 
 # Equation (7) with 'dot' score from Section 3.1 in the paper.
 # Note that we reuse Softmax-activation layer instead of writing tensor calculation
-attention = dot([encoder_out, decoder], name='attention_dot' ,axes=[2, 2])
-attention = Activation('softmax', name='attention_activation')(attention)
-context = dot([attention, encoder_out], name='context_dot' ,axes=[1,1])
+
+print("Encoder-out: ", encoder_out)
+print("Decoder: ", decoder)
+
+attention = dot([encoder_out, decoder], axes=[2, 2])
+attention = Activation('softmax')(attention)
+
+context = dot([attention, encoder_out], axes=[1, 1])
 decoder_combined_context = concatenate([context, decoder])
 print(decoder_combined_context)
 
-decoder_dense = Dense(len(target_vocab)+1, activation='softmax', name='dense_output')
+decoder_dense = Dense(len(target_vocab)+1, activation='softmax')
 decoder_out = decoder_dense(decoder_combined_context) # equation (6) of the paper
 
 # MODEL
 model = Model([encoder_input, decoder_input], decoder_out)
 model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['accuracy'])
 model.summary()
+
+
 # model.fit([source_train, target_train],
 #             target_train_onehot,
 #             batch_size=batch_size,
@@ -138,35 +150,53 @@ model.fit_generator(
     # workers=10
 )
 
+## ------------------------------------------------------------------------------
 # INFERENCE MODELS
+
+
 # Encoder inference model
-encoder_model_inf = Model(encoder_input, encoder_states)
+print("Build encoder inference model")
+encoder_model_inf = Model(encoder_input, [encoder_out] + encoder_states)
 encoder_model_inf.summary()
 
+# ---------------------------------------------------------------------------------------
+
 # Decoder inference model
-decoder_state_input_h = Input(shape=(256,), name='inference_decoder_input_h')
-decoder_state_input_c = Input(shape=(256,), name='inference_decoder_input_c')
+
+# The encoded input line -> necessary for attention calculation!
+encoder_out_in = Input(shape=(source_max_sequence_tokenizer, 256,))
+
+# Final hidden and cell state of the encoder model -> initial state for the decoder!
+decoder_state_input_h = Input(shape=(256,))
+decoder_state_input_c = Input(shape=(256,))
 decoder_input_states = [decoder_state_input_h, decoder_state_input_c]
 
 decoder, decoder_h, decoder_c = decoder_LSTM(x_decode, initial_state=decoder_input_states)
 decoder_states = [decoder_h , decoder_c]
 
-attention = dot([encoder_out, decoder], axes=[2, 2])
+print("Encoder-out-in: ", encoder_out_in)
+print("Decoder: ", decoder)
+
+attention = dot([encoder_out_in, decoder], axes=[2, 2])
 attention = Activation('softmax')(attention)
-context = dot([attention, encoder_out], axes=[1,1])
+context = dot([attention, encoder_out_in], axes=[1, 1])
 
-# print(context, decoder)
+print(context, decoder)
 decoder_combined_context = concatenate([context, decoder])
-# print('decoder_combined_context\t', decoder_combined_context)
+print('decoder_combined_context\t', decoder_combined_context)
 
 decoder_out = decoder_dense(decoder_combined_context)
-decoder_model_inf = Model(inputs=[decoder_input] + decoder_input_states,
-                          outputs=[decoder_out] + decoder_states )
+
+print("Build decoder inference model")
+decoder_model_inf = Model(inputs=[decoder_input, encoder_out_in] + decoder_input_states,
+                          outputs=[decoder_out] + decoder_states)
 decoder_model_inf.summary()
 
 def decode_seq(inp_seq):
 
-    states_val = encoder_model_inf.predict(inp_seq)
+    states_out, states_h, states_c = encoder_model_inf.predict(inp_seq)
+    states_val = [states_h, states_c]
+
     print('states_val\t', states_val)
     input('inference encoder prediction\t')
 
@@ -179,7 +209,7 @@ def decode_seq(inp_seq):
 
     while not stop_condition:
 
-        decoder_out, decoder_h, decoder_c = decoder_model_inf.predict(x=[target_seq] + states_val)
+        decoder_out, decoder_h, decoder_c = decoder_model_inf.predict(x=[target_seq, states_out] + states_val)
         max_val_index = np.argmax(decoder_out[0, -1, :])
 
         try:
@@ -238,4 +268,4 @@ for seq_index in tqdm.tqdm(range(len(source_val))):
 report = classification_report(y_true, y_pred)
 report_df = report_to_df(report)
 report_df.to_csv('logs/classification_report_extended.csv')
-print(report_df)
\ No newline at end of file
+print(report_df)
diff --git a/code_jurica/util.py b/code_jurica/util.py
index 4eafea551ab4532de48724b663a810d3a595042c..6a8aab94a91fb373dad2e35aa8fd8d6a1ab26653 100644
--- a/code_jurica/util.py
+++ b/code_jurica/util.py
@@ -18,7 +18,7 @@ import keras
 from keras.preprocessing.sequence import pad_sequences
 from collections import Counter
 import random
-import osdf
+import os
 
 #REPRODUCIBLE
 np.random.seed(42)
@@ -330,7 +330,7 @@ class prepareData():
         random.shuffle(preparedDictionary)
         corpus = [item[0] for item in preparedDictionary]
         labels = [item[1] for item in preparedDictionary]
-        print(Counterlabels)
+        #print(Counterlabels)
         return corpus, labels
 
 class KerasBatchGenerator(keras.utils.Sequence):