diff --git a/.sync_exclude b/.sync_exclude
new file mode 100644
index 0000000000000000000000000000000000000000..ff328a400a15755441e76c46fd8666d4fcc6e04b
--- /dev/null
+++ b/.sync_exclude
@@ -0,0 +1,11 @@
+.git
+.idea
+_env
+_eval
+_logs
+_out
+_resources/doc2vec
+_tmp
+__pycache__
+data/__pycache__
+util/__pycache__
diff --git a/.sync_ignore b/.sync_ignore
new file mode 100644
index 0000000000000000000000000000000000000000..f29e6611694ef7ba63a2441f771eb429a2d64d8a
--- /dev/null
+++ b/.sync_ignore
@@ -0,0 +1,4 @@
+_resources
+clef18/code_jurica/data
+clef18/code_mario/data
+clef18/code_mario/embeddings
diff --git a/code_jurica/seq2seq_attention.py b/code_jurica/seq2seq_attention.py
index d3688edb53cd64e07a81abec249b01713620d834..7db82f606ef275e9fa5a1300f344f126f7b2f802 100644
--- a/code_jurica/seq2seq_attention.py
+++ b/code_jurica/seq2seq_attention.py
@@ -90,7 +90,12 @@ validation_data_generator = KerasBatchGenerator(batch_size,
                                            target_kerasTokenizer
                                            )
 
-print("Lets train some stuff!")
+
+## ------------------------------------------------------------------------------
+## Encoder-decoder training model
+
+print("Building training model")
+
 # Define an input sequence and process it.
 encoder_input = Input(shape=(source_max_sequence_tokenizer, ))
 x = source_embedding_layer(encoder_input)
@@ -105,9 +110,14 @@ decoder, state_h_decode , state_c_decode = decoder_LSTM(x_decode, initial_state=
 
 # Equation (7) with 'dot' score from Section 3.1 in the paper.
 # Note that we reuse Softmax-activation layer instead of writing tensor calculation
+
+print("Encoder-out: ", encoder_out)
+print("Decoder: ", decoder)
+
 attention = dot([encoder_out, decoder], axes=[2, 2])
 attention = Activation('softmax')(attention)
-context = dot([attention, encoder_out], axes=[1,1])
+
+context = dot([attention, encoder_out], axes=[1, 1])
 decoder_combined_context = concatenate([context, decoder])
 print(decoder_combined_context)
 
@@ -118,6 +128,8 @@ decoder_out = decoder_dense(decoder_combined_context) # equation (6) of the pape
 model = Model([encoder_input, decoder_input], decoder_out)
 model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['accuracy'])
 model.summary()
+
+
 # model.fit([source_train, target_train],
 #             target_train_onehot,
 #             batch_size=batch_size,
@@ -138,11 +150,23 @@ model.fit_generator(
     # workers=10
 )
 
+## ------------------------------------------------------------------------------
 # INFERENCE MODELS
+
+
 # Encoder inference model
-encoder_model_inf = Model(encoder_input, encoder_states)
+print("Build encoder inference model")
+encoder_model_inf = Model(encoder_input, [encoder_out] + encoder_states)
+encoder_model_inf.summary()
+
+# ---------------------------------------------------------------------------------------
 
 # Decoder inference model
+
+# The encoded input line -> necessary for attention calculation!
+encoder_out_in = Input(shape=(source_max_sequence_tokenizer, 256,))
+
+# Final hidden and cell state of the encoder model -> initial state for the decoder!
 decoder_state_input_h = Input(shape=(256,))
 decoder_state_input_c = Input(shape=(256,))
 decoder_input_states = [decoder_state_input_h, decoder_state_input_c]
@@ -150,21 +174,29 @@ decoder_input_states = [decoder_state_input_h, decoder_state_input_c]
 decoder, decoder_h, decoder_c = decoder_LSTM(x_decode, initial_state=decoder_input_states)
 decoder_states = [decoder_h , decoder_c]
 
-attention = dot([encoder_out, decoder], axes=[2, 2])
+print("Encoder-out-in: ", encoder_out_in)
+print("Decoder: ", decoder)
+
+attention = dot([encoder_out_in, decoder], axes=[2, 2])
 attention = Activation('softmax')(attention)
-context = dot([attention, encoder_out], axes=[1,1])
+context = dot([attention, encoder_out_in], axes=[1, 1])
 
 print(context, decoder)
 decoder_combined_context = concatenate([context, decoder])
 print('decoder_combined_context\t', decoder_combined_context)
 
 decoder_out = decoder_dense(decoder_combined_context)
-decoder_model_inf = Model(inputs=[decoder_input] + decoder_input_states,
-                          outputs=[decoder_out] + decoder_states )
+
+print("Build decoder inference model")
+decoder_model_inf = Model(inputs=[decoder_input, encoder_out_in] + decoder_input_states,
+                          outputs=[decoder_out] + decoder_states)
+decoder_model_inf.summary()
 
 def decode_seq(inp_seq):
 
-    states_val = encoder_model_inf.predict(inp_seq)
+    states_out, states_h, states_c = encoder_model_inf.predict(inp_seq)
+    states_val = [states_h, states_c]
+
     print('states_val\t', states_val)
     input('inference encoder prediction\t')
 
@@ -177,7 +209,7 @@ def decode_seq(inp_seq):
 
     while not stop_condition:
 
-        decoder_out, decoder_h, decoder_c = decoder_model_inf.predict(x=[target_seq] + states_val)
+        decoder_out, decoder_h, decoder_c = decoder_model_inf.predict(x=[target_seq, states_out] + states_val)
         max_val_index = np.argmax(decoder_out[0, -1, :])
 
         try:
@@ -236,4 +268,4 @@ for seq_index in tqdm.tqdm(range(len(source_val))):
 report = classification_report(y_true, y_pred)
 report_df = report_to_df(report)
 report_df.to_csv('logs/classification_report_extended.csv')
-print(report_df)
\ No newline at end of file
+print(report_df)
diff --git a/code_jurica/util.py b/code_jurica/util.py
index 4eafea551ab4532de48724b663a810d3a595042c..6a8aab94a91fb373dad2e35aa8fd8d6a1ab26653 100644
--- a/code_jurica/util.py
+++ b/code_jurica/util.py
@@ -18,7 +18,7 @@ import keras
 from keras.preprocessing.sequence import pad_sequences
 from collections import Counter
 import random
-import osdf
+import os
 
 #REPRODUCIBLE
 np.random.seed(42)
@@ -330,7 +330,7 @@ class prepareData():
         random.shuffle(preparedDictionary)
         corpus = [item[0] for item in preparedDictionary]
         labels = [item[1] for item in preparedDictionary]
-        print(Counterlabels)
+        #print(Counterlabels)
         return corpus, labels
 
 class KerasBatchGenerator(keras.utils.Sequence):